mirror of https://github.com/apache/lucene.git
SOLR-9185: Solr's edismax and Lucene/standard query parsers should optionally not split on whitespace before sending terms to analysis
This commit is contained in:
parent
4ee7fc3890
commit
d1b2fb33ef
|
@ -202,6 +202,13 @@ New Features
|
||||||
requires "stored" and must not be multiValued. It's intended for fields that might have very large values so that
|
requires "stored" and must not be multiValued. It's intended for fields that might have very large values so that
|
||||||
they don't get cached in memory. (David Smiley)
|
they don't get cached in memory. (David Smiley)
|
||||||
|
|
||||||
|
* SOLR-9185: Solr's edismax and "Lucene"/standard query parsers will no longer split on whitespace before sending
|
||||||
|
terms to analysis, if given the "sow=false" request param ("sow"=>"split on whitespace"). This enables multi-term
|
||||||
|
source synonyms to match at query-time using SynonymGraphFilterFactory; other analysis components will also now
|
||||||
|
work at query time, e.g. ShingleFilterFactory. By default, and when the "sow=true" param is specified, these
|
||||||
|
parsers' behavior remains the same: queries will be split on whitespace before sending individual terms to analysis.
|
||||||
|
(Steve Rowe)
|
||||||
|
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
|
@ -3,13 +3,17 @@ package org.apache.solr.parser;
|
||||||
|
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
import org.apache.solr.search.QParser;
|
|
||||||
import org.apache.solr.search.SyntaxError;
|
import org.apache.solr.search.SyntaxError;
|
||||||
|
import org.apache.solr.search.QParser;
|
||||||
|
import org.apache.solr.search.QueryParserConfigurationException;
|
||||||
|
|
||||||
|
|
||||||
public class QueryParser extends SolrQueryParserBase implements QueryParserConstants {
|
public class QueryParser extends SolrQueryParserBase implements QueryParserConstants {
|
||||||
|
@ -17,9 +21,44 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
*/
|
*/
|
||||||
static public enum Operator { OR, AND }
|
static public enum Operator { OR, AND }
|
||||||
|
|
||||||
public QueryParser(Version matchVersion, String defaultField, QParser parser) {
|
/** default split on whitespace behavior */
|
||||||
|
public static final boolean DEFAULT_SPLIT_ON_WHITESPACE = true;
|
||||||
|
|
||||||
|
public QueryParser(String defaultField, QParser parser) {
|
||||||
this(new FastCharStream(new StringReader("")));
|
this(new FastCharStream(new StringReader("")));
|
||||||
init(matchVersion, defaultField, parser);
|
init(defaultField, parser);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see #setSplitOnWhitespace(boolean)
|
||||||
|
*/
|
||||||
|
public boolean getSplitOnWhitespace() {
|
||||||
|
return splitOnWhitespace;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether query text should be split on whitespace prior to analysis.
|
||||||
|
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
|
||||||
|
*/
|
||||||
|
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
|
||||||
|
this.splitOnWhitespace = splitOnWhitespace;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE;
|
||||||
|
private static Set<Integer> disallowedPostMultiTerm
|
||||||
|
= new HashSet<Integer>(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR));
|
||||||
|
private static boolean allowedPostMultiTerm(int tokenKind) {
|
||||||
|
return disallowedPostMultiTerm.contains(tokenKind) == false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
|
||||||
|
boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError {
|
||||||
|
if ((getAutoGeneratePhraseQueries() || fieldAutoGenPhraseQueries) && splitOnWhitespace == false) {
|
||||||
|
throw new QueryParserConfigurationException
|
||||||
|
("Field '" + field + "': autoGeneratePhraseQueries == true is disallowed when sow/splitOnWhitespace == false");
|
||||||
|
}
|
||||||
|
return super.newFieldQuery(analyzer, field, queryText, quoted, fieldAutoGenPhraseQueries);
|
||||||
}
|
}
|
||||||
|
|
||||||
// * Query ::= ( Clause )*
|
// * Query ::= ( Clause )*
|
||||||
|
@ -96,13 +135,38 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
|
|
||||||
final public Query Query(String field) throws ParseException, SyntaxError {
|
final public Query Query(String field) throws ParseException, SyntaxError {
|
||||||
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
|
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
|
||||||
Query q, firstQuery=null;
|
Query q;
|
||||||
int conj, mods;
|
int conj, mods;
|
||||||
mods = Modifiers();
|
if (jj_2_1(2)) {
|
||||||
q = Clause(field);
|
MultiTerm(field, clauses);
|
||||||
addClause(clauses, CONJ_NONE, mods, q);
|
} else {
|
||||||
if (mods == MOD_NONE)
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
firstQuery=q;
|
case NOT:
|
||||||
|
case PLUS:
|
||||||
|
case MINUS:
|
||||||
|
case BAREOPER:
|
||||||
|
case LPAREN:
|
||||||
|
case STAR:
|
||||||
|
case QUOTED:
|
||||||
|
case TERM:
|
||||||
|
case PREFIXTERM:
|
||||||
|
case WILDTERM:
|
||||||
|
case REGEXPTERM:
|
||||||
|
case RANGEIN_START:
|
||||||
|
case RANGEEX_START:
|
||||||
|
case LPARAMS:
|
||||||
|
case FILTER:
|
||||||
|
case NUMBER:
|
||||||
|
mods = Modifiers();
|
||||||
|
q = Clause(field);
|
||||||
|
addClause(clauses, CONJ_NONE, mods, q);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
jj_la1[4] = jj_gen;
|
||||||
|
jj_consume_token(-1);
|
||||||
|
throw new ParseException();
|
||||||
|
}
|
||||||
|
}
|
||||||
label_1:
|
label_1:
|
||||||
while (true) {
|
while (true) {
|
||||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
@ -127,19 +191,50 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
;
|
;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
jj_la1[4] = jj_gen;
|
jj_la1[5] = jj_gen;
|
||||||
break label_1;
|
break label_1;
|
||||||
}
|
}
|
||||||
conj = Conjunction();
|
if (jj_2_2(2)) {
|
||||||
mods = Modifiers();
|
MultiTerm(field, clauses);
|
||||||
q = Clause(field);
|
} else {
|
||||||
addClause(clauses, conj, mods, q);
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
}
|
case AND:
|
||||||
if (clauses.size() == 1 && firstQuery != null)
|
case OR:
|
||||||
{if (true) return rawToNormal(firstQuery);}
|
case NOT:
|
||||||
else {
|
case PLUS:
|
||||||
{if (true) return getBooleanQuery(clauses);}
|
case MINUS:
|
||||||
|
case BAREOPER:
|
||||||
|
case LPAREN:
|
||||||
|
case STAR:
|
||||||
|
case QUOTED:
|
||||||
|
case TERM:
|
||||||
|
case PREFIXTERM:
|
||||||
|
case WILDTERM:
|
||||||
|
case REGEXPTERM:
|
||||||
|
case RANGEIN_START:
|
||||||
|
case RANGEEX_START:
|
||||||
|
case LPARAMS:
|
||||||
|
case FILTER:
|
||||||
|
case NUMBER:
|
||||||
|
conj = Conjunction();
|
||||||
|
mods = Modifiers();
|
||||||
|
q = Clause(field);
|
||||||
|
addClause(clauses, conj, mods, q);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
jj_la1[6] = jj_gen;
|
||||||
|
jj_consume_token(-1);
|
||||||
|
throw new ParseException();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if (clauses.size() == 1 && clauses.get(0).getOccur() == BooleanClause.Occur.SHOULD) {
|
||||||
|
Query firstQuery = clauses.get(0).getQuery();
|
||||||
|
if ( ! (firstQuery instanceof RawQuery) || ((RawQuery)firstQuery).getTermCount() == 1) {
|
||||||
|
{if (true) return rawToNormal(firstQuery);}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{if (true) return getBooleanQuery(clauses);}
|
||||||
throw new Error("Missing return statement in function");
|
throw new Error("Missing return statement in function");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -148,20 +243,20 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
Token fieldToken=null, boost=null;
|
Token fieldToken=null, boost=null;
|
||||||
Token localParams=null;
|
Token localParams=null;
|
||||||
int flags = 0;
|
int flags = 0;
|
||||||
if (jj_2_1(2)) {
|
if (jj_2_3(2)) {
|
||||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
case TERM:
|
case TERM:
|
||||||
fieldToken = jj_consume_token(TERM);
|
fieldToken = jj_consume_token(TERM);
|
||||||
jj_consume_token(COLON);
|
jj_consume_token(COLON);
|
||||||
field=discardEscapeChar(fieldToken.image);
|
field = discardEscapeChar(fieldToken.image);
|
||||||
break;
|
break;
|
||||||
case STAR:
|
case STAR:
|
||||||
jj_consume_token(STAR);
|
jj_consume_token(STAR);
|
||||||
jj_consume_token(COLON);
|
jj_consume_token(COLON);
|
||||||
field="*";
|
field = "*";
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
jj_la1[5] = jj_gen;
|
jj_la1[7] = jj_gen;
|
||||||
jj_consume_token(-1);
|
jj_consume_token(-1);
|
||||||
throw new ParseException();
|
throw new ParseException();
|
||||||
}
|
}
|
||||||
|
@ -191,7 +286,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
boost = jj_consume_token(NUMBER);
|
boost = jj_consume_token(NUMBER);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
jj_la1[6] = jj_gen;
|
jj_la1[8] = jj_gen;
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -206,10 +301,10 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
boost = jj_consume_token(NUMBER);
|
boost = jj_consume_token(NUMBER);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
jj_la1[7] = jj_gen;
|
jj_la1[9] = jj_gen;
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
q=getFilter(q); restoreFlags(flags);
|
q=getFilter(q); restoreFlags(flags);
|
||||||
break;
|
break;
|
||||||
case LPARAMS:
|
case LPARAMS:
|
||||||
localParams = jj_consume_token(LPARAMS);
|
localParams = jj_consume_token(LPARAMS);
|
||||||
|
@ -219,17 +314,17 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
boost = jj_consume_token(NUMBER);
|
boost = jj_consume_token(NUMBER);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
jj_la1[8] = jj_gen;
|
jj_la1[10] = jj_gen;
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
q=getLocalParams(field, localParams.image);
|
q=getLocalParams(field, localParams.image);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
jj_la1[9] = jj_gen;
|
jj_la1[11] = jj_gen;
|
||||||
jj_consume_token(-1);
|
jj_consume_token(-1);
|
||||||
throw new ParseException();
|
throw new ParseException();
|
||||||
}
|
}
|
||||||
{if (true) return handleBoost(q, boost);}
|
{if (true) return handleBoost(q, boost);}
|
||||||
throw new Error("Missing return statement in function");
|
throw new Error("Missing return statement in function");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -278,35 +373,48 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
term.image = term.image.substring(0,1);
|
term.image = term.image.substring(0,1);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
jj_la1[10] = jj_gen;
|
jj_la1[12] = jj_gen;
|
||||||
jj_consume_token(-1);
|
jj_consume_token(-1);
|
||||||
throw new ParseException();
|
throw new ParseException();
|
||||||
}
|
}
|
||||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
case FUZZY_SLOP:
|
|
||||||
fuzzySlop = jj_consume_token(FUZZY_SLOP);
|
|
||||||
fuzzy=true;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
jj_la1[11] = jj_gen;
|
|
||||||
;
|
|
||||||
}
|
|
||||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
|
||||||
case CARAT:
|
case CARAT:
|
||||||
jj_consume_token(CARAT);
|
case FUZZY_SLOP:
|
||||||
boost = jj_consume_token(NUMBER);
|
|
||||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
case CARAT:
|
||||||
|
jj_consume_token(CARAT);
|
||||||
|
boost = jj_consume_token(NUMBER);
|
||||||
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
case FUZZY_SLOP:
|
||||||
|
fuzzySlop = jj_consume_token(FUZZY_SLOP);
|
||||||
|
fuzzy=true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
jj_la1[13] = jj_gen;
|
||||||
|
;
|
||||||
|
}
|
||||||
|
break;
|
||||||
case FUZZY_SLOP:
|
case FUZZY_SLOP:
|
||||||
fuzzySlop = jj_consume_token(FUZZY_SLOP);
|
fuzzySlop = jj_consume_token(FUZZY_SLOP);
|
||||||
fuzzy=true;
|
fuzzy=true;
|
||||||
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
case CARAT:
|
||||||
|
jj_consume_token(CARAT);
|
||||||
|
boost = jj_consume_token(NUMBER);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
jj_la1[14] = jj_gen;
|
||||||
|
;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
jj_la1[12] = jj_gen;
|
jj_la1[15] = jj_gen;
|
||||||
;
|
jj_consume_token(-1);
|
||||||
|
throw new ParseException();
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
jj_la1[13] = jj_gen;
|
jj_la1[16] = jj_gen;
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp);
|
q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp);
|
||||||
|
@ -316,13 +424,13 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
case RANGEIN_START:
|
case RANGEIN_START:
|
||||||
jj_consume_token(RANGEIN_START);
|
jj_consume_token(RANGEIN_START);
|
||||||
startInc=true;
|
startInc = true;
|
||||||
break;
|
break;
|
||||||
case RANGEEX_START:
|
case RANGEEX_START:
|
||||||
jj_consume_token(RANGEEX_START);
|
jj_consume_token(RANGEEX_START);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
jj_la1[14] = jj_gen;
|
jj_la1[17] = jj_gen;
|
||||||
jj_consume_token(-1);
|
jj_consume_token(-1);
|
||||||
throw new ParseException();
|
throw new ParseException();
|
||||||
}
|
}
|
||||||
|
@ -334,7 +442,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
goop1 = jj_consume_token(RANGE_QUOTED);
|
goop1 = jj_consume_token(RANGE_QUOTED);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
jj_la1[15] = jj_gen;
|
jj_la1[18] = jj_gen;
|
||||||
jj_consume_token(-1);
|
jj_consume_token(-1);
|
||||||
throw new ParseException();
|
throw new ParseException();
|
||||||
}
|
}
|
||||||
|
@ -343,7 +451,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
jj_consume_token(RANGE_TO);
|
jj_consume_token(RANGE_TO);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
jj_la1[16] = jj_gen;
|
jj_la1[19] = jj_gen;
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
@ -354,20 +462,20 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
goop2 = jj_consume_token(RANGE_QUOTED);
|
goop2 = jj_consume_token(RANGE_QUOTED);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
jj_la1[17] = jj_gen;
|
jj_la1[20] = jj_gen;
|
||||||
jj_consume_token(-1);
|
jj_consume_token(-1);
|
||||||
throw new ParseException();
|
throw new ParseException();
|
||||||
}
|
}
|
||||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
case RANGEIN_END:
|
case RANGEIN_END:
|
||||||
jj_consume_token(RANGEIN_END);
|
jj_consume_token(RANGEIN_END);
|
||||||
endInc=true;
|
endInc = true;
|
||||||
break;
|
break;
|
||||||
case RANGEEX_END:
|
case RANGEEX_END:
|
||||||
jj_consume_token(RANGEEX_END);
|
jj_consume_token(RANGEEX_END);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
jj_la1[18] = jj_gen;
|
jj_la1[21] = jj_gen;
|
||||||
jj_consume_token(-1);
|
jj_consume_token(-1);
|
||||||
throw new ParseException();
|
throw new ParseException();
|
||||||
}
|
}
|
||||||
|
@ -377,46 +485,71 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
boost = jj_consume_token(NUMBER);
|
boost = jj_consume_token(NUMBER);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
jj_la1[19] = jj_gen;
|
jj_la1[22] = jj_gen;
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
boolean startOpen=false;
|
boolean startOpen=false;
|
||||||
boolean endOpen=false;
|
boolean endOpen=false;
|
||||||
if (goop1.kind == RANGE_QUOTED) {
|
if (goop1.kind == RANGE_QUOTED) {
|
||||||
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
|
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
|
||||||
} else if ("*".equals(goop1.image)) {
|
} else if ("*".equals(goop1.image)) {
|
||||||
startOpen=true;
|
startOpen=true;
|
||||||
}
|
}
|
||||||
if (goop2.kind == RANGE_QUOTED) {
|
if (goop2.kind == RANGE_QUOTED) {
|
||||||
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
|
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
|
||||||
} else if ("*".equals(goop2.image)) {
|
} else if ("*".equals(goop2.image)) {
|
||||||
endOpen=true;
|
endOpen=true;
|
||||||
}
|
}
|
||||||
q = getRangeQuery(getField(field), startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
|
q = getRangeQuery(getField(field),
|
||||||
|
startOpen ? null : discardEscapeChar(goop1.image),
|
||||||
|
endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
|
||||||
break;
|
break;
|
||||||
case QUOTED:
|
case QUOTED:
|
||||||
term = jj_consume_token(QUOTED);
|
term = jj_consume_token(QUOTED);
|
||||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
case FUZZY_SLOP:
|
|
||||||
fuzzySlop = jj_consume_token(FUZZY_SLOP);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
jj_la1[20] = jj_gen;
|
|
||||||
;
|
|
||||||
}
|
|
||||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
|
||||||
case CARAT:
|
case CARAT:
|
||||||
jj_consume_token(CARAT);
|
case FUZZY_SLOP:
|
||||||
boost = jj_consume_token(NUMBER);
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
case CARAT:
|
||||||
|
jj_consume_token(CARAT);
|
||||||
|
boost = jj_consume_token(NUMBER);
|
||||||
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
case FUZZY_SLOP:
|
||||||
|
fuzzySlop = jj_consume_token(FUZZY_SLOP);
|
||||||
|
fuzzy=true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
jj_la1[23] = jj_gen;
|
||||||
|
;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case FUZZY_SLOP:
|
||||||
|
fuzzySlop = jj_consume_token(FUZZY_SLOP);
|
||||||
|
fuzzy=true;
|
||||||
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
case CARAT:
|
||||||
|
jj_consume_token(CARAT);
|
||||||
|
boost = jj_consume_token(NUMBER);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
jj_la1[24] = jj_gen;
|
||||||
|
;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
jj_la1[25] = jj_gen;
|
||||||
|
jj_consume_token(-1);
|
||||||
|
throw new ParseException();
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
jj_la1[21] = jj_gen;
|
jj_la1[26] = jj_gen;
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
q = handleQuotedTerm(getField(field), term, fuzzySlop);
|
q = handleQuotedTerm(getField(field), term, fuzzySlop);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
jj_la1[22] = jj_gen;
|
jj_la1[27] = jj_gen;
|
||||||
jj_consume_token(-1);
|
jj_consume_token(-1);
|
||||||
throw new ParseException();
|
throw new ParseException();
|
||||||
}
|
}
|
||||||
|
@ -424,6 +557,44 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
throw new Error("Missing return statement in function");
|
throw new Error("Missing return statement in function");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final public void MultiTerm(String field, List<BooleanClause> clauses) throws ParseException, SyntaxError {
|
||||||
|
Token text;
|
||||||
|
List<String> terms = null;
|
||||||
|
text = jj_consume_token(TERM);
|
||||||
|
if (splitOnWhitespace) {
|
||||||
|
Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true);
|
||||||
|
addClause(clauses, CONJ_NONE, MOD_NONE, q);
|
||||||
|
} else {
|
||||||
|
terms = new ArrayList<String>();
|
||||||
|
terms.add(discardEscapeChar(text.image));
|
||||||
|
}
|
||||||
|
if (getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind)) {
|
||||||
|
|
||||||
|
} else {
|
||||||
|
jj_consume_token(-1);
|
||||||
|
throw new ParseException();
|
||||||
|
}
|
||||||
|
label_2:
|
||||||
|
while (true) {
|
||||||
|
text = jj_consume_token(TERM);
|
||||||
|
if (splitOnWhitespace) {
|
||||||
|
Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true);
|
||||||
|
addClause(clauses, CONJ_NONE, MOD_NONE, q);
|
||||||
|
} else {
|
||||||
|
terms.add(discardEscapeChar(text.image));
|
||||||
|
}
|
||||||
|
if (getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind)) {
|
||||||
|
;
|
||||||
|
} else {
|
||||||
|
break label_2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (splitOnWhitespace == false) {
|
||||||
|
Query q = getFieldQuery(getField(field), terms, true);
|
||||||
|
addMultiTermClause(clauses, q);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private boolean jj_2_1(int xla) {
|
private boolean jj_2_1(int xla) {
|
||||||
jj_la = xla; jj_lastpos = jj_scanpos = token;
|
jj_la = xla; jj_lastpos = jj_scanpos = token;
|
||||||
try { return !jj_3_1(); }
|
try { return !jj_3_1(); }
|
||||||
|
@ -431,28 +602,76 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
finally { jj_save(0, xla); }
|
finally { jj_save(0, xla); }
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean jj_3R_3() {
|
private boolean jj_2_2(int xla) {
|
||||||
if (jj_scan_token(STAR)) return true;
|
jj_la = xla; jj_lastpos = jj_scanpos = token;
|
||||||
if (jj_scan_token(COLON)) return true;
|
try { return !jj_3_2(); }
|
||||||
|
catch(LookaheadSuccess ls) { return true; }
|
||||||
|
finally { jj_save(1, xla); }
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean jj_2_3(int xla) {
|
||||||
|
jj_la = xla; jj_lastpos = jj_scanpos = token;
|
||||||
|
try { return !jj_3_3(); }
|
||||||
|
catch(LookaheadSuccess ls) { return true; }
|
||||||
|
finally { jj_save(2, xla); }
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean jj_3R_7() {
|
||||||
|
if (jj_scan_token(TERM)) return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean jj_3R_2() {
|
private boolean jj_3R_4() {
|
||||||
if (jj_scan_token(TERM)) return true;
|
if (jj_scan_token(TERM)) return true;
|
||||||
if (jj_scan_token(COLON)) return true;
|
if (jj_scan_token(COLON)) return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean jj_3_1() {
|
private boolean jj_3_1() {
|
||||||
|
if (jj_3R_3()) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean jj_3R_6() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean jj_3R_3() {
|
||||||
|
if (jj_scan_token(TERM)) return true;
|
||||||
|
jj_lookingAhead = true;
|
||||||
|
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
|
||||||
|
jj_lookingAhead = false;
|
||||||
|
if (!jj_semLA || jj_3R_6()) return true;
|
||||||
|
Token xsp;
|
||||||
|
if (jj_3R_7()) return true;
|
||||||
|
while (true) {
|
||||||
|
xsp = jj_scanpos;
|
||||||
|
if (jj_3R_7()) { jj_scanpos = xsp; break; }
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean jj_3_3() {
|
||||||
Token xsp;
|
Token xsp;
|
||||||
xsp = jj_scanpos;
|
xsp = jj_scanpos;
|
||||||
if (jj_3R_2()) {
|
if (jj_3R_4()) {
|
||||||
jj_scanpos = xsp;
|
jj_scanpos = xsp;
|
||||||
if (jj_3R_3()) return true;
|
if (jj_3R_5()) return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean jj_3_2() {
|
||||||
|
if (jj_3R_3()) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean jj_3R_5() {
|
||||||
|
if (jj_scan_token(STAR)) return true;
|
||||||
|
if (jj_scan_token(COLON)) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/** Generated Token Manager. */
|
/** Generated Token Manager. */
|
||||||
public QueryParserTokenManager token_source;
|
public QueryParserTokenManager token_source;
|
||||||
/** Current token. */
|
/** Current token. */
|
||||||
|
@ -462,8 +681,11 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
private int jj_ntk;
|
private int jj_ntk;
|
||||||
private Token jj_scanpos, jj_lastpos;
|
private Token jj_scanpos, jj_lastpos;
|
||||||
private int jj_la;
|
private int jj_la;
|
||||||
|
/** Whether we are looking ahead. */
|
||||||
|
private boolean jj_lookingAhead = false;
|
||||||
|
private boolean jj_semLA;
|
||||||
private int jj_gen;
|
private int jj_gen;
|
||||||
final private int[] jj_la1 = new int[23];
|
final private int[] jj_la1 = new int[28];
|
||||||
static private int[] jj_la1_0;
|
static private int[] jj_la1_0;
|
||||||
static private int[] jj_la1_1;
|
static private int[] jj_la1_1;
|
||||||
static {
|
static {
|
||||||
|
@ -471,12 +693,12 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
jj_la1_init_1();
|
jj_la1_init_1();
|
||||||
}
|
}
|
||||||
private static void jj_la1_init_0() {
|
private static void jj_la1_init_0() {
|
||||||
jj_la1_0 = new int[] {0x6000,0x6000,0x38000,0x38000,0xfb4fe000,0x2400000,0x800000,0x800000,0x800000,0xfb4c0000,0x3a440000,0x4000000,0x4000000,0x800000,0xc0000000,0x0,0x0,0x0,0x0,0x800000,0x4000000,0x800000,0xfb440000,};
|
jj_la1_0 = new int[] {0x6000,0x6000,0x38000,0x38000,0xfb4f8000,0xfb4fe000,0xfb4fe000,0x2400000,0x800000,0x800000,0x800000,0xfb4c0000,0x3a440000,0x4000000,0x800000,0x4800000,0x4800000,0xc0000000,0x0,0x0,0x0,0x0,0x800000,0x4000000,0x800000,0x4800000,0x4800000,0xfb440000,};
|
||||||
}
|
}
|
||||||
private static void jj_la1_init_1() {
|
private static void jj_la1_init_1() {
|
||||||
jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x7,0x0,0x0,0x0,0x0,0x7,0x4,0x0,0x0,0x0,0x0,0xc0,0x8,0xc0,0x30,0x0,0x0,0x0,0x4,};
|
jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x7,0x7,0x7,0x0,0x0,0x0,0x0,0x7,0x4,0x0,0x0,0x0,0x0,0x0,0xc0,0x8,0xc0,0x30,0x0,0x0,0x0,0x0,0x0,0x4,};
|
||||||
}
|
}
|
||||||
final private JJCalls[] jj_2_rtns = new JJCalls[1];
|
final private JJCalls[] jj_2_rtns = new JJCalls[3];
|
||||||
private boolean jj_rescan = false;
|
private boolean jj_rescan = false;
|
||||||
private int jj_gc = 0;
|
private int jj_gc = 0;
|
||||||
|
|
||||||
|
@ -486,7 +708,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
token = new Token();
|
token = new Token();
|
||||||
jj_ntk = -1;
|
jj_ntk = -1;
|
||||||
jj_gen = 0;
|
jj_gen = 0;
|
||||||
for (int i = 0; i < 23; i++) jj_la1[i] = -1;
|
for (int i = 0; i < 28; i++) jj_la1[i] = -1;
|
||||||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -495,8 +717,9 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
token_source.ReInit(stream);
|
token_source.ReInit(stream);
|
||||||
token = new Token();
|
token = new Token();
|
||||||
jj_ntk = -1;
|
jj_ntk = -1;
|
||||||
|
jj_lookingAhead = false;
|
||||||
jj_gen = 0;
|
jj_gen = 0;
|
||||||
for (int i = 0; i < 23; i++) jj_la1[i] = -1;
|
for (int i = 0; i < 28; i++) jj_la1[i] = -1;
|
||||||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -506,7 +729,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
token = new Token();
|
token = new Token();
|
||||||
jj_ntk = -1;
|
jj_ntk = -1;
|
||||||
jj_gen = 0;
|
jj_gen = 0;
|
||||||
for (int i = 0; i < 23; i++) jj_la1[i] = -1;
|
for (int i = 0; i < 28; i++) jj_la1[i] = -1;
|
||||||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -516,7 +739,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
token = new Token();
|
token = new Token();
|
||||||
jj_ntk = -1;
|
jj_ntk = -1;
|
||||||
jj_gen = 0;
|
jj_gen = 0;
|
||||||
for (int i = 0; i < 23; i++) jj_la1[i] = -1;
|
for (int i = 0; i < 28; i++) jj_la1[i] = -1;
|
||||||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -579,7 +802,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
|
|
||||||
/** Get the specific Token. */
|
/** Get the specific Token. */
|
||||||
final public Token getToken(int index) {
|
final public Token getToken(int index) {
|
||||||
Token t = token;
|
Token t = jj_lookingAhead ? jj_scanpos : token;
|
||||||
for (int i = 0; i < index; i++) {
|
for (int i = 0; i < index; i++) {
|
||||||
if (t.next != null) t = t.next;
|
if (t.next != null) t = t.next;
|
||||||
else t = t.next = token_source.getNextToken();
|
else t = t.next = token_source.getNextToken();
|
||||||
|
@ -633,7 +856,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
la1tokens[jj_kind] = true;
|
la1tokens[jj_kind] = true;
|
||||||
jj_kind = -1;
|
jj_kind = -1;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < 23; i++) {
|
for (int i = 0; i < 28; i++) {
|
||||||
if (jj_la1[i] == jj_gen) {
|
if (jj_la1[i] == jj_gen) {
|
||||||
for (int j = 0; j < 32; j++) {
|
for (int j = 0; j < 32; j++) {
|
||||||
if ((jj_la1_0[i] & (1<<j)) != 0) {
|
if ((jj_la1_0[i] & (1<<j)) != 0) {
|
||||||
|
@ -672,7 +895,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
|
|
||||||
private void jj_rescan_token() {
|
private void jj_rescan_token() {
|
||||||
jj_rescan = true;
|
jj_rescan = true;
|
||||||
for (int i = 0; i < 1; i++) {
|
for (int i = 0; i < 3; i++) {
|
||||||
try {
|
try {
|
||||||
JJCalls p = jj_2_rtns[i];
|
JJCalls p = jj_2_rtns[i];
|
||||||
do {
|
do {
|
||||||
|
@ -680,6 +903,8 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||||
jj_la = p.arg; jj_lastpos = jj_scanpos = p.first;
|
jj_la = p.arg; jj_lastpos = jj_scanpos = p.first;
|
||||||
switch (i) {
|
switch (i) {
|
||||||
case 0: jj_3_1(); break;
|
case 0: jj_3_1(); break;
|
||||||
|
case 1: jj_3_2(); break;
|
||||||
|
case 2: jj_3_3(); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
p = p.next;
|
p = p.next;
|
||||||
|
|
|
@ -27,18 +27,17 @@ package org.apache.solr.parser;
|
||||||
|
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.document.DateTools;
|
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TermRangeQuery;
|
|
||||||
import org.apache.lucene.search.TermQuery;
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
import org.apache.solr.search.SyntaxError;
|
import org.apache.solr.search.SyntaxError;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
|
import org.apache.solr.search.QueryParserConfigurationException;
|
||||||
|
|
||||||
|
|
||||||
public class QueryParser extends SolrQueryParserBase {
|
public class QueryParser extends SolrQueryParserBase {
|
||||||
|
@ -46,9 +45,44 @@ public class QueryParser extends SolrQueryParserBase {
|
||||||
*/
|
*/
|
||||||
static public enum Operator { OR, AND }
|
static public enum Operator { OR, AND }
|
||||||
|
|
||||||
public QueryParser(Version matchVersion, String defaultField, QParser parser) {
|
/** default split on whitespace behavior */
|
||||||
|
public static final boolean DEFAULT_SPLIT_ON_WHITESPACE = true;
|
||||||
|
|
||||||
|
public QueryParser(String defaultField, QParser parser) {
|
||||||
this(new FastCharStream(new StringReader("")));
|
this(new FastCharStream(new StringReader("")));
|
||||||
init(matchVersion, defaultField, parser);
|
init(defaultField, parser);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see #setSplitOnWhitespace(boolean)
|
||||||
|
*/
|
||||||
|
public boolean getSplitOnWhitespace() {
|
||||||
|
return splitOnWhitespace;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether query text should be split on whitespace prior to analysis.
|
||||||
|
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
|
||||||
|
*/
|
||||||
|
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
|
||||||
|
this.splitOnWhitespace = splitOnWhitespace;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE;
|
||||||
|
private static Set<Integer> disallowedPostMultiTerm
|
||||||
|
= new HashSet<Integer>(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR));
|
||||||
|
private static boolean allowedPostMultiTerm(int tokenKind) {
|
||||||
|
return disallowedPostMultiTerm.contains(tokenKind) == false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
|
||||||
|
boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError {
|
||||||
|
if ((getAutoGeneratePhraseQueries() || fieldAutoGenPhraseQueries) && splitOnWhitespace == false) {
|
||||||
|
throw new QueryParserConfigurationException
|
||||||
|
("Field '" + field + "': autoGeneratePhraseQueries == true is disallowed when sow/splitOnWhitespace == false");
|
||||||
|
}
|
||||||
|
return super.newFieldQuery(analyzer, field, queryText, quoted, fieldAutoGenPhraseQueries);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,17 +97,15 @@ TOKEN_MGR_DECLS : {
|
||||||
/* ***************** */
|
/* ***************** */
|
||||||
|
|
||||||
<*> TOKEN : {
|
<*> TOKEN : {
|
||||||
<#_NUM_CHAR: ["0"-"9"] >
|
<#_NUM_CHAR: ["0"-"9"] >
|
||||||
// every character that follows a backslash is considered as an escaped character
|
| <#_ESCAPED_CHAR: "\\" ~[] > // every character that follows a backslash is considered as an escaped character
|
||||||
| <#_ESCAPED_CHAR: "\\" ~[] >
|
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
|
||||||
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
|
"[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ]
|
||||||
"[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ]
|
| <_ESCAPED_CHAR> ) >
|
||||||
| <_ESCAPED_CHAR> ) >
|
| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" | "/" | "!") >
|
||||||
| <#_TERM_CHAR: ( <_TERM_START_CHAR>
|
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
|
||||||
| <_ESCAPED_CHAR> | "-" | "+" | "/" | "!") >
|
| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
|
||||||
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
|
| <#_SQUOTED_CHAR: ( ~[ "'", "\\" ] | <_ESCAPED_CHAR> ) >
|
||||||
| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
|
|
||||||
| <#_SQUOTED_CHAR: ( ~[ "'", "\\" ] | <_ESCAPED_CHAR> ) >
|
|
||||||
}
|
}
|
||||||
|
|
||||||
<DEFAULT, COMMENT> SKIP : {
|
<DEFAULT, COMMENT> SKIP : {
|
||||||
|
@ -93,44 +125,43 @@ TOKEN_MGR_DECLS : {
|
||||||
< <_WHITESPACE>>
|
< <_WHITESPACE>>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
<DEFAULT> TOKEN : {
|
<DEFAULT> TOKEN : {
|
||||||
<AND: ("AND" | "&&") >
|
<AND: ("AND" | "&&") >
|
||||||
| <OR: ("OR" | "||") >
|
| <OR: ("OR" | "||") >
|
||||||
| <NOT: ("NOT" | "!") >
|
| <NOT: ("NOT" | "!") >
|
||||||
| <PLUS: "+" >
|
| <PLUS: "+" >
|
||||||
| <MINUS: "-" >
|
| <MINUS: "-" >
|
||||||
| <BAREOPER: ("+"|"-"|"!") <_WHITESPACE> >
|
| <BAREOPER: ("+"|"-"|"!") <_WHITESPACE> >
|
||||||
| <LPAREN: "(" >
|
| <LPAREN: "(" >
|
||||||
| <RPAREN: ")" >
|
| <RPAREN: ")" >
|
||||||
| <COLON: ":" >
|
| <COLON: ":" >
|
||||||
| <STAR: "*" >
|
| <STAR: "*" >
|
||||||
| <CARAT: "^" > : Boost
|
| <CARAT: "^" > : Boost
|
||||||
| <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">
|
| <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">
|
||||||
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
|
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
|
||||||
| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
|
| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
|
||||||
| <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
|
| <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
|
||||||
| <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
|
| <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
|
||||||
| <REGEXPTERM: "/" (~["*","/"] | "\\/" ) (~[ "/" ] | "\\/" )* "/" >
|
| <REGEXPTERM: "/" (~["*","/"] | "\\/" ) (~[ "/" ] | "\\/" )* "/" >
|
||||||
| <RANGEIN_START: "[" > : Range
|
| <RANGEIN_START: "[" > : Range
|
||||||
| <RANGEEX_START: "{" > : Range
|
| <RANGEEX_START: "{" > : Range
|
||||||
// TODO: consider using token states instead of inlining SQUOTED
|
// TODO: consider using token states instead of inlining SQUOTED
|
||||||
// | <SQUOTED: "'" (<_SQUOTED_CHAR>)* "'">
|
// | <SQUOTED: "'" (<_SQUOTED_CHAR>)* "'">
|
||||||
// | <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | <SQUOTED> | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* >
|
// | <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | <SQUOTED> | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* >
|
||||||
| <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | ("'" (<_SQUOTED_CHAR>)* "'") | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* >
|
| <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | ("'" (<_SQUOTED_CHAR>)* "'") | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* >
|
||||||
| <FILTER: "filter(" >
|
| <FILTER: "filter(" >
|
||||||
}
|
}
|
||||||
|
|
||||||
<Boost> TOKEN : {
|
<Boost> TOKEN : {
|
||||||
<NUMBER: ("=")?("-")? (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
|
<NUMBER: ("=")?("-")? (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
|
||||||
}
|
}
|
||||||
|
|
||||||
<Range> TOKEN : {
|
<Range> TOKEN : {
|
||||||
<RANGE_TO: "TO">
|
<RANGE_TO: "TO">
|
||||||
| <RANGEIN_END: "]"> : DEFAULT
|
| <RANGEIN_END: "]"> : DEFAULT
|
||||||
| <RANGEEX_END: "}"> : DEFAULT
|
| <RANGEEX_END: "}"> : DEFAULT
|
||||||
| <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
|
| <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
|
||||||
| <RANGE_GOOP: (~[ " ", "]", "}" ])+ >
|
| <RANGE_GOOP: (~[ " ", "]", "}" ])+ >
|
||||||
}
|
}
|
||||||
|
|
||||||
// * Query ::= ( Clause )*
|
// * Query ::= ( Clause )*
|
||||||
|
@ -160,8 +191,7 @@ int Modifiers() : {
|
||||||
}
|
}
|
||||||
|
|
||||||
// This makes sure that there is no garbage after the query string
|
// This makes sure that there is no garbage after the query string
|
||||||
Query TopLevelQuery(String field) throws SyntaxError :
|
Query TopLevelQuery(String field) throws SyntaxError : {
|
||||||
{
|
|
||||||
Query q;
|
Query q;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
|
@ -174,27 +204,31 @@ Query TopLevelQuery(String field) throws SyntaxError :
|
||||||
Query Query(String field) throws SyntaxError :
|
Query Query(String field) throws SyntaxError :
|
||||||
{
|
{
|
||||||
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
|
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
|
||||||
Query q, firstQuery=null;
|
Query q;
|
||||||
int conj, mods;
|
int conj, mods;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
mods=Modifiers() q=Clause(field)
|
|
||||||
{
|
|
||||||
addClause(clauses, CONJ_NONE, mods, q);
|
|
||||||
if (mods == MOD_NONE)
|
|
||||||
firstQuery=q;
|
|
||||||
}
|
|
||||||
(
|
(
|
||||||
conj=Conjunction() mods=Modifiers() q=Clause(field)
|
LOOKAHEAD(2)
|
||||||
{ addClause(clauses, conj, mods, q); }
|
MultiTerm(field, clauses)
|
||||||
|
| mods=Modifiers() q=Clause(field)
|
||||||
|
{ addClause(clauses, CONJ_NONE, mods, q); }
|
||||||
|
)
|
||||||
|
(
|
||||||
|
LOOKAHEAD(2)
|
||||||
|
MultiTerm(field, clauses)
|
||||||
|
| conj=Conjunction() mods=Modifiers() q=Clause(field)
|
||||||
|
{ addClause(clauses, conj, mods, q); }
|
||||||
)*
|
)*
|
||||||
{
|
{
|
||||||
if (clauses.size() == 1 && firstQuery != null)
|
if (clauses.size() == 1 && clauses.get(0).getOccur() == BooleanClause.Occur.SHOULD) {
|
||||||
|
Query firstQuery = clauses.get(0).getQuery();
|
||||||
|
if ( ! (firstQuery instanceof RawQuery) || ((RawQuery)firstQuery).getTermCount() == 1) {
|
||||||
return rawToNormal(firstQuery);
|
return rawToNormal(firstQuery);
|
||||||
else {
|
|
||||||
return getBooleanQuery(clauses);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return getBooleanQuery(clauses);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Query Clause(String field) throws SyntaxError : {
|
Query Clause(String field) throws SyntaxError : {
|
||||||
|
@ -204,26 +238,22 @@ Query Clause(String field) throws SyntaxError : {
|
||||||
int flags = 0;
|
int flags = 0;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
|
|
||||||
[
|
[
|
||||||
LOOKAHEAD(2)
|
LOOKAHEAD(2)
|
||||||
(
|
(
|
||||||
fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);}
|
fieldToken=<TERM> <COLON> { field = discardEscapeChar(fieldToken.image); }
|
||||||
| <STAR> <COLON> {field="*";}
|
| <STAR> <COLON> { field = "*"; }
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
(
|
(
|
||||||
q=Term(field)
|
q=Term(field)
|
||||||
| <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?
|
| <LPAREN> q=Query(field) <RPAREN> [ <CARAT> boost=<NUMBER> ]
|
||||||
| (<FILTER> { flags=startFilter(); } q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? { q=getFilter(q); restoreFlags(flags); } )
|
| (<FILTER> { flags=startFilter(); } q=Query(field) <RPAREN> [ <CARAT> boost=<NUMBER> ] { q=getFilter(q); restoreFlags(flags); } )
|
||||||
| (localParams = <LPARAMS> (<CARAT> boost=<NUMBER>)? { q=getLocalParams(field, localParams.image); } )
|
| (localParams = <LPARAMS> [ <CARAT> boost=<NUMBER> ] { q=getLocalParams(field, localParams.image); } )
|
||||||
)
|
)
|
||||||
{ return handleBoost(q, boost); }
|
{ return handleBoost(q, boost); }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Query Term(String field) throws SyntaxError : {
|
Query Term(String field) throws SyntaxError : {
|
||||||
Token term, boost=null, fuzzySlop=null, goop1, goop2;
|
Token term, boost=null, fuzzySlop=null, goop1, goop2;
|
||||||
boolean prefix = false;
|
boolean prefix = false;
|
||||||
|
@ -245,40 +275,78 @@ Query Term(String field) throws SyntaxError : {
|
||||||
| term=<NUMBER>
|
| term=<NUMBER>
|
||||||
| term=<BAREOPER> { term.image = term.image.substring(0,1); }
|
| term=<BAREOPER> { term.image = term.image.substring(0,1); }
|
||||||
)
|
)
|
||||||
[ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
|
[
|
||||||
[ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
|
<CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
|
||||||
|
| fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } [ <CARAT> boost=<NUMBER> ]
|
||||||
|
]
|
||||||
|
{ q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp); }
|
||||||
|
|
||||||
|
| ( <RANGEIN_START> { startInc = true; } | <RANGEEX_START> )
|
||||||
|
( goop1=<RANGE_GOOP> | goop1=<RANGE_QUOTED> )
|
||||||
|
[ <RANGE_TO> ]
|
||||||
|
( goop2=<RANGE_GOOP> | goop2=<RANGE_QUOTED> )
|
||||||
|
( <RANGEIN_END> { endInc = true; } | <RANGEEX_END> )
|
||||||
|
[ <CARAT> boost=<NUMBER> ]
|
||||||
{
|
{
|
||||||
q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp);
|
boolean startOpen=false;
|
||||||
}
|
boolean endOpen=false;
|
||||||
| ( ( <RANGEIN_START> {startInc=true;} | <RANGEEX_START> )
|
if (goop1.kind == RANGE_QUOTED) {
|
||||||
( goop1=<RANGE_GOOP>|goop1=<RANGE_QUOTED> )
|
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
|
||||||
[ <RANGE_TO> ]
|
} else if ("*".equals(goop1.image)) {
|
||||||
( goop2=<RANGE_GOOP>|goop2=<RANGE_QUOTED> )
|
startOpen=true;
|
||||||
( <RANGEIN_END> {endInc=true;} | <RANGEEX_END>))
|
|
||||||
[ <CARAT> boost=<NUMBER> ]
|
|
||||||
{
|
|
||||||
boolean startOpen=false;
|
|
||||||
boolean endOpen=false;
|
|
||||||
if (goop1.kind == RANGE_QUOTED) {
|
|
||||||
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
|
|
||||||
} else if ("*".equals(goop1.image)) {
|
|
||||||
startOpen=true;
|
|
||||||
}
|
|
||||||
if (goop2.kind == RANGE_QUOTED) {
|
|
||||||
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
|
|
||||||
} else if ("*".equals(goop2.image)) {
|
|
||||||
endOpen=true;
|
|
||||||
}
|
|
||||||
q = getRangeQuery(getField(field), startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
|
|
||||||
}
|
|
||||||
| term=<QUOTED>
|
|
||||||
[ fuzzySlop=<FUZZY_SLOP> ]
|
|
||||||
[ <CARAT> boost=<NUMBER> ]
|
|
||||||
{
|
|
||||||
q = handleQuotedTerm(getField(field), term, fuzzySlop);
|
|
||||||
}
|
}
|
||||||
|
if (goop2.kind == RANGE_QUOTED) {
|
||||||
|
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
|
||||||
|
} else if ("*".equals(goop2.image)) {
|
||||||
|
endOpen=true;
|
||||||
|
}
|
||||||
|
q = getRangeQuery(getField(field),
|
||||||
|
startOpen ? null : discardEscapeChar(goop1.image),
|
||||||
|
endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
|
||||||
|
}
|
||||||
|
| term=<QUOTED>
|
||||||
|
[
|
||||||
|
<CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
|
||||||
|
| fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } [ <CARAT> boost=<NUMBER> ]
|
||||||
|
]
|
||||||
|
{ q = handleQuotedTerm(getField(field), term, fuzzySlop); }
|
||||||
)
|
)
|
||||||
|
{ return handleBoost(q, boost); }
|
||||||
|
}
|
||||||
|
|
||||||
|
void MultiTerm(String field, List<BooleanClause> clauses) throws SyntaxError : {
|
||||||
|
Token text;
|
||||||
|
List<String> terms = null;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
text=<TERM>
|
||||||
{
|
{
|
||||||
return handleBoost(q, boost);
|
if (splitOnWhitespace) {
|
||||||
|
Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true);
|
||||||
|
addClause(clauses, CONJ_NONE, MOD_NONE, q);
|
||||||
|
} else {
|
||||||
|
terms = new ArrayList<String>();
|
||||||
|
terms.add(discardEscapeChar(text.image));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Both lookaheads are required; the first lookahead vets the first following term and the second lookahead vets the rest
|
||||||
|
LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) })
|
||||||
|
(
|
||||||
|
LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) })
|
||||||
|
text=<TERM>
|
||||||
|
{
|
||||||
|
if (splitOnWhitespace) {
|
||||||
|
Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true);
|
||||||
|
addClause(clauses, CONJ_NONE, MOD_NONE, q);
|
||||||
|
} else {
|
||||||
|
terms.add(discardEscapeChar(text.image));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)+
|
||||||
|
{
|
||||||
|
if (splitOnWhitespace == false) {
|
||||||
|
Query q = getFieldQuery(getField(field), terms, true);
|
||||||
|
addMultiTermClause(clauses, q);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,10 +18,12 @@ package org.apache.solr.parser;
|
||||||
|
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
||||||
|
@ -41,7 +43,6 @@ import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.RegexpQuery;
|
import org.apache.lucene.search.RegexpQuery;
|
||||||
import org.apache.lucene.search.WildcardQuery;
|
import org.apache.lucene.search.WildcardQuery;
|
||||||
import org.apache.lucene.util.QueryBuilder;
|
import org.apache.lucene.util.QueryBuilder;
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
import org.apache.lucene.util.automaton.Automata;
|
import org.apache.lucene.util.automaton.Automata;
|
||||||
import org.apache.lucene.util.automaton.Automaton;
|
import org.apache.lucene.util.automaton.Automaton;
|
||||||
import org.apache.lucene.util.automaton.Operations;
|
import org.apache.lucene.util.automaton.Operations;
|
||||||
|
@ -59,7 +60,7 @@ import org.apache.solr.search.SolrConstantScoreQuery;
|
||||||
import org.apache.solr.search.SyntaxError;
|
import org.apache.solr.search.SyntaxError;
|
||||||
|
|
||||||
/** This class is overridden by QueryParser in QueryParser.jj
|
/** This class is overridden by QueryParser in QueryParser.jj
|
||||||
* and acts to separate the majority of the Java code from the .jj grammar file.
|
* and acts to separate the majority of the Java code from the .jj grammar file.
|
||||||
*/
|
*/
|
||||||
public abstract class SolrQueryParserBase extends QueryBuilder {
|
public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||||
|
|
||||||
|
@ -83,7 +84,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||||
public static final Operator OR_OPERATOR = Operator.OR;
|
public static final Operator OR_OPERATOR = Operator.OR;
|
||||||
|
|
||||||
/** The default operator that parser uses to combine query terms */
|
/** The default operator that parser uses to combine query terms */
|
||||||
Operator operator = OR_OPERATOR;
|
protected Operator operator = OR_OPERATOR;
|
||||||
|
|
||||||
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_REWRITE;
|
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_REWRITE;
|
||||||
boolean allowLeadingWildcard = true;
|
boolean allowLeadingWildcard = true;
|
||||||
|
@ -133,16 +134,32 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||||
// internal: A simple raw fielded query
|
// internal: A simple raw fielded query
|
||||||
public static class RawQuery extends Query {
|
public static class RawQuery extends Query {
|
||||||
final SchemaField sfield;
|
final SchemaField sfield;
|
||||||
final String externalVal;
|
private final List<String> externalVals;
|
||||||
|
|
||||||
public RawQuery(SchemaField sfield, String externalVal) {
|
public RawQuery(SchemaField sfield, String externalVal) {
|
||||||
|
this(sfield, Collections.singletonList(externalVal));
|
||||||
|
}
|
||||||
|
|
||||||
|
public RawQuery(SchemaField sfield, List<String> externalVals) {
|
||||||
this.sfield = sfield;
|
this.sfield = sfield;
|
||||||
this.externalVal = externalVal;
|
this.externalVals = externalVals;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getTermCount() {
|
||||||
|
return externalVals.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getExternalVals() {
|
||||||
|
return externalVals;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getJoinedExternalVal() {
|
||||||
|
return externalVals.size() == 1 ? externalVals.get(0) : String.join(" ", externalVals);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString(String field) {
|
public String toString(String field) {
|
||||||
return "RAW(" + field + "," + externalVal + ")";
|
return "RAW(" + field + "," + getJoinedExternalVal() + ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -165,7 +182,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||||
public abstract Query TopLevelQuery(String field) throws ParseException, SyntaxError;
|
public abstract Query TopLevelQuery(String field) throws ParseException, SyntaxError;
|
||||||
|
|
||||||
|
|
||||||
public void init(Version matchVersion, String defaultField, QParser parser) {
|
public void init(String defaultField, QParser parser) {
|
||||||
this.schema = parser.getReq().getSchema();
|
this.schema = parser.getReq().getSchema();
|
||||||
this.parser = parser;
|
this.parser = parser;
|
||||||
this.flags = parser.getFlags();
|
this.flags = parser.getFlags();
|
||||||
|
@ -406,17 +423,30 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||||
throw new RuntimeException("Clause cannot be both required and prohibited");
|
throw new RuntimeException("Clause cannot be both required and prohibited");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called from QueryParser's MultiTerm rule.
|
||||||
|
* Assumption: no conjunction or modifiers (conj == CONJ_NONE and mods == MOD_NONE)
|
||||||
|
*/
|
||||||
|
protected void addMultiTermClause(List<BooleanClause> clauses, Query q) {
|
||||||
|
// We might have been passed a null query; the term might have been
|
||||||
|
// filtered away by the analyzer.
|
||||||
|
if (q == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
clauses.add(newBooleanClause(q, operator == AND_OPERATOR ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
|
||||||
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws SyntaxError {
|
boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError {
|
||||||
BooleanClause.Occur occur = operator == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
|
BooleanClause.Occur occur = operator == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
|
||||||
return createFieldQuery(analyzer, occur, field, queryText, quoted || autoGeneratePhraseQueries, phraseSlop);
|
return createFieldQuery(analyzer, occur, field, queryText,
|
||||||
|
quoted || fieldAutoGenPhraseQueries || autoGeneratePhraseQueries, phraseSlop);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}.
|
* Base implementation delegates to {@link #getFieldQuery(String,String,boolean,boolean)}.
|
||||||
* This method may be overridden, for example, to return
|
* This method may be overridden, for example, to return
|
||||||
* a SpanNearQuery instead of a PhraseQuery.
|
* a SpanNearQuery instead of a PhraseQuery.
|
||||||
*
|
*
|
||||||
|
@ -440,7 +470,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||||
query = builder.build();
|
query = builder.build();
|
||||||
} else if (query instanceof MultiPhraseQuery) {
|
} else if (query instanceof MultiPhraseQuery) {
|
||||||
MultiPhraseQuery mpq = (MultiPhraseQuery)query;
|
MultiPhraseQuery mpq = (MultiPhraseQuery)query;
|
||||||
|
|
||||||
if (slop != mpq.getSlop()) {
|
if (slop != mpq.getSlop()) {
|
||||||
query = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build();
|
query = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build();
|
||||||
}
|
}
|
||||||
|
@ -492,7 +522,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||||
protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
|
protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
|
||||||
// FuzzyQuery doesn't yet allow constant score rewrite
|
// FuzzyQuery doesn't yet allow constant score rewrite
|
||||||
String text = term.text();
|
String text = term.text();
|
||||||
int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity,
|
int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity,
|
||||||
text.codePointCount(0, text.length()));
|
text.codePointCount(0, text.length()));
|
||||||
return new FuzzyQuery(term,numEdits,prefixLength);
|
return new FuzzyQuery(term,numEdits,prefixLength);
|
||||||
}
|
}
|
||||||
|
@ -536,14 +566,21 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||||
}
|
}
|
||||||
|
|
||||||
SchemaField sfield = null;
|
SchemaField sfield = null;
|
||||||
List<String> fieldValues = null;
|
List<RawQuery> fieldValues = null;
|
||||||
|
|
||||||
|
boolean onlyRawQueries = true;
|
||||||
boolean useTermsQuery = (flags & QParser.FLAG_FILTER)!=0 && clauses.size() > TERMS_QUERY_THRESHOLD;
|
int allRawQueriesTermCount = 0;
|
||||||
int clausesAdded = 0;
|
for (BooleanClause clause : clauses) {
|
||||||
|
if (clause.getQuery() instanceof RawQuery) {
|
||||||
|
allRawQueriesTermCount += ((RawQuery)clause.getQuery()).getTermCount();
|
||||||
|
} else {
|
||||||
|
onlyRawQueries = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
boolean useTermsQuery = (flags & QParser.FLAG_FILTER)!=0 && allRawQueriesTermCount > TERMS_QUERY_THRESHOLD;
|
||||||
|
|
||||||
BooleanQuery.Builder booleanBuilder = newBooleanQuery();
|
BooleanQuery.Builder booleanBuilder = newBooleanQuery();
|
||||||
Map<SchemaField, List<String>> fmap = new HashMap<>();
|
Map<SchemaField, List<RawQuery>> fmap = new HashMap<>();
|
||||||
|
|
||||||
for (BooleanClause clause : clauses) {
|
for (BooleanClause clause : clauses) {
|
||||||
Query subq = clause.getQuery();
|
Query subq = clause.getQuery();
|
||||||
|
@ -563,14 +600,14 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||||
// If this field isn't indexed, or if it is indexed and we want to use TermsQuery, then collect this value.
|
// If this field isn't indexed, or if it is indexed and we want to use TermsQuery, then collect this value.
|
||||||
// We are currently relying on things like PointField not being marked as indexed in order to bypass
|
// We are currently relying on things like PointField not being marked as indexed in order to bypass
|
||||||
// the "useTermQuery" check.
|
// the "useTermQuery" check.
|
||||||
if (fieldValues == null && useTermsQuery || !sfield.indexed()) {
|
if ((fieldValues == null && useTermsQuery) || !sfield.indexed()) {
|
||||||
fieldValues = new ArrayList<>(2);
|
fieldValues = new ArrayList<>(2);
|
||||||
fmap.put(sfield, fieldValues);
|
fmap.put(sfield, fieldValues);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fieldValues != null) {
|
if (fieldValues != null) {
|
||||||
fieldValues.add(rawq.externalVal);
|
fieldValues.add(rawq);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -578,33 +615,50 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
clausesAdded++;
|
|
||||||
booleanBuilder.add(clause);
|
booleanBuilder.add(clause);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
for (Map.Entry<SchemaField,List<String>> entry : fmap.entrySet()) {
|
for (Map.Entry<SchemaField,List<RawQuery>> entry : fmap.entrySet()) {
|
||||||
sfield = entry.getKey();
|
sfield = entry.getKey();
|
||||||
fieldValues = entry.getValue();
|
fieldValues = entry.getValue();
|
||||||
FieldType ft = sfield.getType();
|
FieldType ft = sfield.getType();
|
||||||
|
|
||||||
// TODO: pull more of this logic out to FieldType? We would need to be able to add clauses to our existing booleanBuilder.
|
// TODO: pull more of this logic out to FieldType? We would need to be able to add clauses to our existing booleanBuilder.
|
||||||
if (sfield.indexed() && fieldValues.size() < TERMS_QUERY_THRESHOLD || fieldValues.size() == 1) {
|
int termCount = fieldValues.stream().mapToInt(RawQuery::getTermCount).sum();
|
||||||
|
if ((sfield.indexed() && termCount < TERMS_QUERY_THRESHOLD) || termCount == 1) {
|
||||||
// use boolean query instead
|
// use boolean query instead
|
||||||
for (String externalVal : fieldValues) {
|
for (RawQuery rawq : fieldValues) {
|
||||||
Query subq = ft.getFieldQuery(this.parser, sfield, externalVal);
|
Query subq;
|
||||||
clausesAdded++;
|
if (ft.isTokenized() && sfield.indexed()) {
|
||||||
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
|
boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries();
|
||||||
|
subq = newFieldQuery(getAnalyzer(), sfield.getName(), rawq.getJoinedExternalVal(),
|
||||||
|
false, fieldAutoGenPhraseQueries);
|
||||||
|
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
|
||||||
|
} else {
|
||||||
|
for (String externalVal : rawq.getExternalVals()) {
|
||||||
|
subq = ft.getFieldQuery(this.parser, sfield, externalVal);
|
||||||
|
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Query subq = ft.getSetQuery(this.parser, sfield, fieldValues);
|
List<String> externalVals
|
||||||
if (fieldValues.size() == clauses.size()) return subq; // if this is everything, don't wrap in a boolean query
|
= fieldValues.stream().flatMap(rawq -> rawq.getExternalVals().stream()).collect(Collectors.toList());
|
||||||
clausesAdded++;
|
Query subq = ft.getSetQuery(this.parser, sfield, externalVals);
|
||||||
|
if (onlyRawQueries && termCount == allRawQueriesTermCount) return subq; // if this is everything, don't wrap in a boolean query
|
||||||
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
|
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return booleanBuilder.build();
|
BooleanQuery bq = booleanBuilder.build();
|
||||||
|
if (bq.clauses().size() == 1) { // Unwrap single SHOULD query
|
||||||
|
BooleanClause clause = bq.clauses().iterator().next();
|
||||||
|
if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
|
||||||
|
return clause.getQuery();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -835,9 +889,26 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||||
|
|
||||||
// Create a "normal" query from a RawQuery (or just return the current query if it's not raw)
|
// Create a "normal" query from a RawQuery (or just return the current query if it's not raw)
|
||||||
Query rawToNormal(Query q) {
|
Query rawToNormal(Query q) {
|
||||||
if (!(q instanceof RawQuery)) return q;
|
Query normal = q;
|
||||||
RawQuery rq = (RawQuery)q;
|
if (q instanceof RawQuery) {
|
||||||
return rq.sfield.getType().getFieldQuery(parser, rq.sfield, rq.externalVal);
|
RawQuery rawq = (RawQuery)q;
|
||||||
|
if (rawq.sfield.getType().isTokenized()) {
|
||||||
|
normal = rawq.sfield.getType().getFieldQuery(parser, rawq.sfield, rawq.getJoinedExternalVal());
|
||||||
|
} else {
|
||||||
|
FieldType ft = rawq.sfield.getType();
|
||||||
|
if (rawq.getTermCount() == 1) {
|
||||||
|
normal = ft.getFieldQuery(this.parser, rawq.sfield, rawq.getExternalVals().get(0));
|
||||||
|
} else {
|
||||||
|
BooleanQuery.Builder booleanBuilder = newBooleanQuery();
|
||||||
|
for (String externalVal : rawq.getExternalVals()) {
|
||||||
|
Query subq = ft.getFieldQuery(this.parser, rawq.sfield, externalVal);
|
||||||
|
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
|
||||||
|
}
|
||||||
|
normal = booleanBuilder.build();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return normal;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws SyntaxError {
|
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws SyntaxError {
|
||||||
|
@ -877,21 +948,87 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||||
FieldType ft = sf.getType();
|
FieldType ft = sf.getType();
|
||||||
// delegate to type for everything except tokenized fields
|
// delegate to type for everything except tokenized fields
|
||||||
if (ft.isTokenized() && sf.indexed()) {
|
if (ft.isTokenized() && sf.indexed()) {
|
||||||
return newFieldQuery(getAnalyzer(), field, queryText, quoted || (ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries()));
|
boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries();
|
||||||
|
return newFieldQuery(getAnalyzer(), field, queryText, quoted, fieldAutoGenPhraseQueries);
|
||||||
} else {
|
} else {
|
||||||
if (raw) {
|
if (raw) {
|
||||||
return new RawQuery(sf, queryText);
|
return new RawQuery(sf, queryText);
|
||||||
} else {
|
} else {
|
||||||
return sf.getType().getFieldQuery(parser, sf, queryText);
|
return ft.getFieldQuery(parser, sf, queryText);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// default to a normal field query
|
// default to a normal field query
|
||||||
return newFieldQuery(getAnalyzer(), field, queryText, quoted);
|
return newFieldQuery(getAnalyzer(), field, queryText, quoted, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){
|
// Assumption: quoted is always false
|
||||||
|
protected Query getFieldQuery(String field, List<String> queryTerms, boolean raw) throws SyntaxError {
|
||||||
|
checkNullField(field);
|
||||||
|
|
||||||
|
SchemaField sf;
|
||||||
|
if (field.equals(lastFieldName)) {
|
||||||
|
// only look up the SchemaField on a field change... this helps with memory allocation of dynamic fields
|
||||||
|
// and large queries like foo_i:(1 2 3 4 5 6 7 8 9 10) when we are passed "foo_i" each time.
|
||||||
|
sf = lastField;
|
||||||
|
} else {
|
||||||
|
// intercept magic field name of "_" to use as a hook for our
|
||||||
|
// own functions.
|
||||||
|
if (field.charAt(0) == '_' && parser != null) {
|
||||||
|
MagicFieldName magic = MagicFieldName.get(field);
|
||||||
|
if (null != magic) {
|
||||||
|
subQParser = parser.subQuery(String.join(" ", queryTerms), magic.subParser);
|
||||||
|
return subQParser.getQuery();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lastFieldName = field;
|
||||||
|
sf = lastField = schema.getFieldOrNull(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sf != null) {
|
||||||
|
FieldType ft = sf.getType();
|
||||||
|
// delegate to type for everything except tokenized fields
|
||||||
|
if (ft.isTokenized() && sf.indexed()) {
|
||||||
|
String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms);
|
||||||
|
boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries();
|
||||||
|
return newFieldQuery(getAnalyzer(), field, queryText, false, fieldAutoGenPhraseQueries);
|
||||||
|
} else {
|
||||||
|
if (raw) {
|
||||||
|
return new RawQuery(sf, queryTerms);
|
||||||
|
} else {
|
||||||
|
if (queryTerms.size() == 1) {
|
||||||
|
return ft.getFieldQuery(parser, sf, queryTerms.get(0));
|
||||||
|
} else {
|
||||||
|
List<Query> subqs = new ArrayList<>();
|
||||||
|
for (String queryTerm : queryTerms) {
|
||||||
|
try {
|
||||||
|
subqs.add(ft.getFieldQuery(parser, sf, queryTerm));
|
||||||
|
} catch (Exception e) { // assumption: raw = false only when called from ExtendedDismaxQueryParser.getQuery()
|
||||||
|
// for edismax: ignore parsing failures
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (subqs.size() == 1) {
|
||||||
|
return subqs.get(0);
|
||||||
|
} else { // delay building boolean query until we must
|
||||||
|
final BooleanClause.Occur occur
|
||||||
|
= operator == AND_OPERATOR ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
|
||||||
|
BooleanQuery.Builder booleanBuilder = newBooleanQuery();
|
||||||
|
subqs.forEach(subq -> booleanBuilder.add(subq, occur));
|
||||||
|
return booleanBuilder.build();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// default to a normal field query
|
||||||
|
String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms);
|
||||||
|
return newFieldQuery(getAnalyzer(), field, queryText, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){
|
||||||
checkNullField(field);
|
checkNullField(field);
|
||||||
SchemaField sf = schema.getField(field);
|
SchemaField sf = schema.getField(field);
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.solr.search;
|
package org.apache.solr.search;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -160,6 +161,8 @@ public class ExtendedDismaxQParser extends QParser {
|
||||||
|
|
||||||
// but always for unstructured implicit bqs created by getFieldQuery
|
// but always for unstructured implicit bqs created by getFieldQuery
|
||||||
up.minShouldMatch = config.minShouldMatch;
|
up.minShouldMatch = config.minShouldMatch;
|
||||||
|
|
||||||
|
up.setSplitOnWhitespace(config.splitOnWhitespace);
|
||||||
|
|
||||||
parsedUserQuery = parseOriginalQuery(up, mainUserQuery, clauses, config);
|
parsedUserQuery = parseOriginalQuery(up, mainUserQuery, clauses, config);
|
||||||
|
|
||||||
|
@ -307,6 +310,8 @@ public class ExtendedDismaxQParser extends QParser {
|
||||||
up.setRemoveStopFilter(true);
|
up.setRemoveStopFilter(true);
|
||||||
query = up.parse(mainUserQuery);
|
query = up.parse(mainUserQuery);
|
||||||
}
|
}
|
||||||
|
} catch (QueryParserConfigurationException e) {
|
||||||
|
throw e; // Don't ignore configuration exceptions
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// ignore failure and reparse later after escaping reserved chars
|
// ignore failure and reparse later after escaping reserved chars
|
||||||
up.exceptions = false;
|
up.exceptions = false;
|
||||||
|
@ -545,6 +550,7 @@ public class ExtendedDismaxQParser extends QParser {
|
||||||
pp.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, getFieldBoosts(fields));
|
pp.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, getFieldBoosts(fields));
|
||||||
pp.setPhraseSlop(slop);
|
pp.setPhraseSlop(slop);
|
||||||
pp.setRemoveStopFilter(true); // remove stop filter and keep stopwords
|
pp.setRemoveStopFilter(true); // remove stop filter and keep stopwords
|
||||||
|
pp.setSplitOnWhitespace(config.splitOnWhitespace);
|
||||||
|
|
||||||
/* :TODO: reevaluate using makeDismax=true vs false...
|
/* :TODO: reevaluate using makeDismax=true vs false...
|
||||||
*
|
*
|
||||||
|
@ -976,6 +982,7 @@ public class ExtendedDismaxQParser extends QParser {
|
||||||
private String field;
|
private String field;
|
||||||
private String val;
|
private String val;
|
||||||
private String val2;
|
private String val2;
|
||||||
|
private List<String> vals;
|
||||||
private boolean bool;
|
private boolean bool;
|
||||||
private boolean bool2;
|
private boolean bool2;
|
||||||
private float flt;
|
private float flt;
|
||||||
|
@ -1036,6 +1043,7 @@ public class ExtendedDismaxQParser extends QParser {
|
||||||
this.type = quoted ? QType.PHRASE : QType.FIELD;
|
this.type = quoted ? QType.PHRASE : QType.FIELD;
|
||||||
this.field = field;
|
this.field = field;
|
||||||
this.val = val;
|
this.val = val;
|
||||||
|
this.vals = null;
|
||||||
this.slop = getPhraseSlop(); // unspecified
|
this.slop = getPhraseSlop(); // unspecified
|
||||||
return getAliasedQuery();
|
return getAliasedQuery();
|
||||||
}
|
}
|
||||||
|
@ -1045,10 +1053,21 @@ public class ExtendedDismaxQParser extends QParser {
|
||||||
this.type = QType.PHRASE;
|
this.type = QType.PHRASE;
|
||||||
this.field = field;
|
this.field = field;
|
||||||
this.val = val;
|
this.val = val;
|
||||||
|
this.vals = null;
|
||||||
this.slop = slop;
|
this.slop = slop;
|
||||||
return getAliasedQuery();
|
return getAliasedQuery();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Query getFieldQuery(String field, List<String> queryTerms, boolean raw) throws SyntaxError {
|
||||||
|
this.type = QType.FIELD;
|
||||||
|
this.field = field;
|
||||||
|
this.val = null;
|
||||||
|
this.vals = queryTerms;
|
||||||
|
this.slop = getPhraseSlop();
|
||||||
|
return getAliasedMultiTermQuery(queryTerms);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Query getPrefixQuery(String field, String val) throws SyntaxError {
|
protected Query getPrefixQuery(String field, String val) throws SyntaxError {
|
||||||
if (val.equals("") && field.equals("*")) {
|
if (val.equals("") && field.equals("*")) {
|
||||||
|
@ -1057,11 +1076,17 @@ public class ExtendedDismaxQParser extends QParser {
|
||||||
this.type = QType.PREFIX;
|
this.type = QType.PREFIX;
|
||||||
this.field = field;
|
this.field = field;
|
||||||
this.val = val;
|
this.val = val;
|
||||||
|
this.vals = null;
|
||||||
return getAliasedQuery();
|
return getAliasedQuery();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws SyntaxError {
|
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
|
||||||
|
boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError {
|
||||||
|
if ((getAutoGeneratePhraseQueries() || fieldAutoGenPhraseQueries) && getSplitOnWhitespace() == false) {
|
||||||
|
throw new QueryParserConfigurationException
|
||||||
|
("Field '" + field + "': autoGeneratePhraseQueries == true is disallowed when sow/splitOnWhitespace == false");
|
||||||
|
}
|
||||||
Analyzer actualAnalyzer;
|
Analyzer actualAnalyzer;
|
||||||
if (removeStopFilter) {
|
if (removeStopFilter) {
|
||||||
if (nonStopFilterAnalyzerPerField == null) {
|
if (nonStopFilterAnalyzerPerField == null) {
|
||||||
|
@ -1074,7 +1099,7 @@ public class ExtendedDismaxQParser extends QParser {
|
||||||
} else {
|
} else {
|
||||||
actualAnalyzer = parser.getReq().getSchema().getFieldType(field).getQueryAnalyzer();
|
actualAnalyzer = parser.getReq().getSchema().getFieldType(field).getQueryAnalyzer();
|
||||||
}
|
}
|
||||||
return super.newFieldQuery(actualAnalyzer, field, queryText, quoted);
|
return super.newFieldQuery(actualAnalyzer, field, queryText, quoted, fieldAutoGenPhraseQueries);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -1083,6 +1108,7 @@ public class ExtendedDismaxQParser extends QParser {
|
||||||
this.field = field;
|
this.field = field;
|
||||||
this.val = a;
|
this.val = a;
|
||||||
this.val2 = b;
|
this.val2 = b;
|
||||||
|
this.vals = null;
|
||||||
this.bool = startInclusive;
|
this.bool = startInclusive;
|
||||||
this.bool2 = endInclusive;
|
this.bool2 = endInclusive;
|
||||||
return getAliasedQuery();
|
return getAliasedQuery();
|
||||||
|
@ -1100,6 +1126,7 @@ public class ExtendedDismaxQParser extends QParser {
|
||||||
this.type = QType.WILDCARD;
|
this.type = QType.WILDCARD;
|
||||||
this.field = field;
|
this.field = field;
|
||||||
this.val = val;
|
this.val = val;
|
||||||
|
this.vals = null;
|
||||||
return getAliasedQuery();
|
return getAliasedQuery();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1108,6 +1135,7 @@ public class ExtendedDismaxQParser extends QParser {
|
||||||
this.type = QType.FUZZY;
|
this.type = QType.FUZZY;
|
||||||
this.field = field;
|
this.field = field;
|
||||||
this.val = val;
|
this.val = val;
|
||||||
|
this.vals = null;
|
||||||
this.flt = minSimilarity;
|
this.flt = minSimilarity;
|
||||||
return getAliasedQuery();
|
return getAliasedQuery();
|
||||||
}
|
}
|
||||||
|
@ -1157,7 +1185,129 @@ public class ExtendedDismaxQParser extends QParser {
|
||||||
return getQuery();
|
return getQuery();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delegates to the super class unless the field has been specified
|
||||||
|
* as an alias -- in which case we recurse on each of
|
||||||
|
* the aliased fields, and the results are composed into a
|
||||||
|
* DisjunctionMaxQuery. (so yes: aliases which point at other
|
||||||
|
* aliases should work)
|
||||||
|
*/
|
||||||
|
protected Query getAliasedMultiTermQuery(List<String> queryTerms) throws SyntaxError {
|
||||||
|
Alias a = aliases.get(field);
|
||||||
|
this.validateCyclicAliasing(field);
|
||||||
|
if (a != null) {
|
||||||
|
List<Query> lst = getQueries(a);
|
||||||
|
if (lst == null || lst.size() == 0) {
|
||||||
|
return getQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
// make a DisjunctionMaxQuery in this case too... it will stop
|
||||||
|
// the "mm" processing from making everything required in the case
|
||||||
|
// that the query expanded to multiple clauses.
|
||||||
|
// DisMaxQuery.rewrite() removes itself if there is just a single clause anyway.
|
||||||
|
// if (lst.size()==1) return lst.get(0);
|
||||||
|
if (makeDismax) {
|
||||||
|
if (lst.get(0) instanceof BooleanQuery && allSameQueryStructure(lst)) {
|
||||||
|
BooleanQuery.Builder q = new BooleanQuery.Builder();
|
||||||
|
List<Query> subs = new ArrayList<>(lst.size());
|
||||||
|
for (int c = 0 ; c < ((BooleanQuery)lst.get(0)).clauses().size() ; ++c) {
|
||||||
|
subs.clear();
|
||||||
|
// Make a dismax query for each clause position in the boolean per-field queries.
|
||||||
|
for (int n = 0 ; n < lst.size() ; ++n) {
|
||||||
|
subs.add(((BooleanQuery)lst.get(n)).clauses().get(c).getQuery());
|
||||||
|
}
|
||||||
|
q.add(newBooleanClause(new DisjunctionMaxQuery(subs, a.tie), BooleanClause.Occur.SHOULD));
|
||||||
|
}
|
||||||
|
return q.build();
|
||||||
|
} else {
|
||||||
|
return new DisjunctionMaxQuery(lst, a.tie);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
BooleanQuery.Builder q = new BooleanQuery.Builder();
|
||||||
|
for (Query sub : lst) {
|
||||||
|
q.add(sub, BooleanClause.Occur.SHOULD);
|
||||||
|
}
|
||||||
|
return q.build();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// verify that a fielded query is actually on a field that exists... if not,
|
||||||
|
// then throw an exception to get us out of here, and we'll treat it like a
|
||||||
|
// literal when we try the escape+re-parse.
|
||||||
|
if (exceptions) {
|
||||||
|
FieldType ft = schema.getFieldTypeNoEx(field);
|
||||||
|
if (ft == null && null == MagicFieldName.get(field)) {
|
||||||
|
throw unknownField;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return getQuery();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Recursively examines the given query list for identical structure in all queries. */
|
||||||
|
private boolean allSameQueryStructure(List<Query> lst) {
|
||||||
|
boolean allSame = true;
|
||||||
|
Query firstQuery = lst.get(0);
|
||||||
|
for (int n = 1 ; n < lst.size(); ++n) {
|
||||||
|
Query nthQuery = lst.get(n);
|
||||||
|
if (nthQuery.getClass() != firstQuery.getClass()) {
|
||||||
|
allSame = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (firstQuery instanceof BooleanQuery) {
|
||||||
|
List<BooleanClause> firstBooleanClauses = ((BooleanQuery)firstQuery).clauses();
|
||||||
|
List<BooleanClause> nthBooleanClauses = ((BooleanQuery)nthQuery).clauses();
|
||||||
|
if (firstBooleanClauses.size() != nthBooleanClauses.size()) {
|
||||||
|
allSame = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
for (int c = 0 ; c < firstBooleanClauses.size() ; ++c) {
|
||||||
|
if (nthBooleanClauses.get(c).getQuery().getClass() != firstBooleanClauses.get(c).getQuery().getClass()
|
||||||
|
|| nthBooleanClauses.get(c).getOccur() != firstBooleanClauses.get(c).getOccur()) {
|
||||||
|
allSame = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (firstBooleanClauses.get(c).getQuery() instanceof BooleanQuery && ! allSameQueryStructure
|
||||||
|
(Arrays.asList(firstBooleanClauses.get(c).getQuery(), nthBooleanClauses.get(c).getQuery()))) {
|
||||||
|
allSame = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return allSame;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void addMultiTermClause(List<BooleanClause> clauses, Query q) {
|
||||||
|
// We might have been passed a null query; the terms might have been filtered away by the analyzer.
|
||||||
|
if (q == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean required = operator == AND_OPERATOR;
|
||||||
|
BooleanClause.Occur occur = required ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
|
||||||
|
|
||||||
|
if (q instanceof BooleanQuery) {
|
||||||
|
boolean allOptionalDisMaxQueries = true;
|
||||||
|
for (BooleanClause c : ((BooleanQuery)q).clauses()) {
|
||||||
|
if (c.getOccur() != BooleanClause.Occur.SHOULD || ! (c.getQuery() instanceof DisjunctionMaxQuery)) {
|
||||||
|
allOptionalDisMaxQueries = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (allOptionalDisMaxQueries) {
|
||||||
|
// getAliasedMultiTermQuery() constructed a BooleanQuery containing only SHOULD DisjunctionMaxQuery-s.
|
||||||
|
// Unwrap the query and add a clause for each contained DisMax query.
|
||||||
|
for (BooleanClause c : ((BooleanQuery)q).clauses()) {
|
||||||
|
clauses.add(newBooleanClause(c.getQuery(), occur));
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
clauses.add(newBooleanClause(q, occur));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validate there is no cyclic referencing in the aliasing
|
* Validate there is no cyclic referencing in the aliasing
|
||||||
*/
|
*/
|
||||||
|
@ -1212,7 +1362,12 @@ public class ExtendedDismaxQParser extends QParser {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case FIELD: // fallthrough
|
case FIELD: // fallthrough
|
||||||
case PHRASE:
|
case PHRASE:
|
||||||
Query query = super.getFieldQuery(field, val, type == QType.PHRASE, false);
|
Query query;
|
||||||
|
if (val == null) {
|
||||||
|
query = super.getFieldQuery(field, vals, false);
|
||||||
|
} else {
|
||||||
|
query = super.getFieldQuery(field, val, type == QType.PHRASE, false);
|
||||||
|
}
|
||||||
// Boolean query on a whitespace-separated string
|
// Boolean query on a whitespace-separated string
|
||||||
// If these were synonyms we would have a SynonymQuery
|
// If these were synonyms we would have a SynonymQuery
|
||||||
if (query instanceof BooleanQuery) {
|
if (query instanceof BooleanQuery) {
|
||||||
|
@ -1248,6 +1403,8 @@ public class ExtendedDismaxQParser extends QParser {
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
} catch (QueryParserConfigurationException e) {
|
||||||
|
throw e; // Don't ignore configuration exceptions
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// an exception here is due to the field query not being compatible with the input text
|
// an exception here is due to the field query not being compatible with the input text
|
||||||
// for example, passing a string to a numeric field.
|
// for example, passing a string to a numeric field.
|
||||||
|
@ -1442,7 +1599,7 @@ public class ExtendedDismaxQParser extends QParser {
|
||||||
*/
|
*/
|
||||||
public class ExtendedDismaxConfiguration {
|
public class ExtendedDismaxConfiguration {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The field names specified by 'qf' that (most) clauses will
|
* The field names specified by 'qf' that (most) clauses will
|
||||||
* be queried against
|
* be queried against
|
||||||
*/
|
*/
|
||||||
|
@ -1478,7 +1635,9 @@ public class ExtendedDismaxQParser extends QParser {
|
||||||
protected boolean lowercaseOperators;
|
protected boolean lowercaseOperators;
|
||||||
|
|
||||||
protected String[] boostFuncs;
|
protected String[] boostFuncs;
|
||||||
|
|
||||||
|
protected boolean splitOnWhitespace;
|
||||||
|
|
||||||
public ExtendedDismaxConfiguration(SolrParams localParams,
|
public ExtendedDismaxConfiguration(SolrParams localParams,
|
||||||
SolrParams params, SolrQueryRequest req) {
|
SolrParams params, SolrQueryRequest req) {
|
||||||
solrParams = SolrParams.wrapDefaults(localParams, params);
|
solrParams = SolrParams.wrapDefaults(localParams, params);
|
||||||
|
@ -1522,6 +1681,8 @@ public class ExtendedDismaxQParser extends QParser {
|
||||||
boostFuncs = solrParams.getParams(DisMaxParams.BF);
|
boostFuncs = solrParams.getParams(DisMaxParams.BF);
|
||||||
|
|
||||||
multBoosts = solrParams.getParams(DMP.MULT_BOOST);
|
multBoosts = solrParams.getParams(DMP.MULT_BOOST);
|
||||||
|
|
||||||
|
splitOnWhitespace = solrParams.getBool(QueryParsing.SPLIT_ON_WHITESPACE, SolrQueryParser.DEFAULT_SPLIT_ON_WHITESPACE);
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.search;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.solr.common.params.CommonParams;
|
import org.apache.solr.common.params.CommonParams;
|
||||||
import org.apache.solr.common.params.SolrParams;
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
import org.apache.solr.common.util.StrUtils;
|
||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -46,6 +47,8 @@ public class LuceneQParser extends QParser {
|
||||||
lparser.setDefaultOperator
|
lparser.setDefaultOperator
|
||||||
(QueryParsing.getQueryParserDefaultOperator(getReq().getSchema(),
|
(QueryParsing.getQueryParserDefaultOperator(getReq().getSchema(),
|
||||||
getParam(QueryParsing.OP)));
|
getParam(QueryParsing.OP)));
|
||||||
|
lparser.setSplitOnWhitespace(StrUtils.parseBool
|
||||||
|
(getParam(QueryParsing.SPLIT_ON_WHITESPACE), SolrQueryParser.DEFAULT_SPLIT_ON_WHITESPACE));
|
||||||
|
|
||||||
return lparser.parse(qstr);
|
return lparser.parse(qstr);
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,6 +28,8 @@ import java.util.List;
|
||||||
* <br>Other parameters:<ul>
|
* <br>Other parameters:<ul>
|
||||||
* <li>q.op - the default operator "OR" or "AND"</li>
|
* <li>q.op - the default operator "OR" or "AND"</li>
|
||||||
* <li>df - the default field name</li>
|
* <li>df - the default field name</li>
|
||||||
|
* <li>sow - split on whitespace prior to analysis, boolean,
|
||||||
|
* default=<code>{@value org.apache.solr.search.SolrQueryParser#DEFAULT_SPLIT_ON_WHITESPACE}</code></li>
|
||||||
* </ul>
|
* </ul>
|
||||||
* <br>Example: <code>{!lucene q.op=AND df=text sort='price asc'}myfield:foo +bar -baz</code>
|
* <br>Example: <code>{!lucene q.op=AND df=text sort='price asc'}myfield:foo +bar -baz</code>
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.search;
|
||||||
|
|
||||||
|
public class QueryParserConfigurationException extends IllegalArgumentException {
|
||||||
|
public QueryParserConfigurationException(String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
}
|
|
@ -51,6 +51,7 @@ public class QueryParsing {
|
||||||
public static final String F = "f"; // field that a query or command pertains to
|
public static final String F = "f"; // field that a query or command pertains to
|
||||||
public static final String TYPE = "type";// parser for this query or command
|
public static final String TYPE = "type";// parser for this query or command
|
||||||
public static final String DEFTYPE = "defType"; // default parser for any direct subqueries
|
public static final String DEFTYPE = "defType"; // default parser for any direct subqueries
|
||||||
|
public static final String SPLIT_ON_WHITESPACE = "sow"; // Whether to split on whitespace prior to analysis
|
||||||
public static final String LOCALPARAM_START = "{!";
|
public static final String LOCALPARAM_START = "{!";
|
||||||
public static final char LOCALPARAM_END = '}';
|
public static final char LOCALPARAM_END = '}';
|
||||||
// true if the value was specified by the "v" param (i.e. v=myval, or v=$param)
|
// true if the value was specified by the "v" param (i.e. v=myval, or v=$param)
|
||||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.solr.parser.QueryParser;
|
||||||
public class SolrQueryParser extends QueryParser {
|
public class SolrQueryParser extends QueryParser {
|
||||||
|
|
||||||
public SolrQueryParser(QParser parser, String defaultField) {
|
public SolrQueryParser(QParser parser, String defaultField) {
|
||||||
super(parser.getReq().getCore().getSolrConfig().luceneMatchVersion, defaultField, parser);
|
super(defaultField, parser);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
US, U.S., U S, USA, U.S.A., U S A, United States, United States of America
|
|
@ -0,0 +1,50 @@
|
||||||
|
<?xml version="1.0" ?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<schema name="test-multi-word-synonyms" version="1.6">
|
||||||
|
|
||||||
|
<fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
|
||||||
|
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
|
||||||
|
<field name="signatureField" type="string" indexed="true" stored="false"/>
|
||||||
|
|
||||||
|
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
|
||||||
|
<analyzer type="index">
|
||||||
|
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
<analyzer type="query">
|
||||||
|
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.SynonymGraphFilterFactory"
|
||||||
|
synonyms="multiword-synonyms.txt"
|
||||||
|
tokenizerFactory="solr.StandardTokenizerFactory"
|
||||||
|
ignoreCase="true"
|
||||||
|
expand="true"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
|
||||||
|
<uniqueKey>id</uniqueKey>
|
||||||
|
|
||||||
|
<field name="_version_" type="long" indexed="false" stored="false" docValues="true"/>
|
||||||
|
|
||||||
|
<field name="text" type="text" indexed="true" stored="true"/>
|
||||||
|
|
||||||
|
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
|
||||||
|
|
||||||
|
</schema>
|
|
@ -29,3 +29,5 @@ Television, Televisions, TV, TVs
|
||||||
# Synonym mappings can be used for spelling correction too
|
# Synonym mappings can be used for spelling correction too
|
||||||
pixima => pixma
|
pixima => pixma
|
||||||
|
|
||||||
|
# multiword synonyms
|
||||||
|
wi fi => wifi
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,100 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.search;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestMultiWordSynonyms extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
initCore("solrconfig.xml", "schema-multiword-synonyms.xml");
|
||||||
|
index();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void index() throws Exception {
|
||||||
|
assertU(adoc("id","1", "text","USA Today"));
|
||||||
|
assertU(adoc("id","2", "text","A dynamic US economy"));
|
||||||
|
assertU(adoc("id","3", "text","The United States of America's 50 states"));
|
||||||
|
assertU(adoc("id","4", "text","Party in the U.S.A."));
|
||||||
|
assertU(adoc("id","5", "text","These United States"));
|
||||||
|
|
||||||
|
assertU(adoc("id","6", "text","America United of States"));
|
||||||
|
assertU(adoc("id","7", "text","States United"));
|
||||||
|
|
||||||
|
assertU(commit());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNonPhrase() throws Exception {
|
||||||
|
// Don't split on whitespace (sow=false)
|
||||||
|
for (String q : Arrays.asList("US", "U.S.", "USA", "U.S.A.", "United States", "United States of America")) {
|
||||||
|
for (String defType : Arrays.asList("lucene", "edismax")) {
|
||||||
|
assertJQ(req("q", q,
|
||||||
|
"defType", defType,
|
||||||
|
"df", "text",
|
||||||
|
"sow", "false")
|
||||||
|
, "/response/numFound==7"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split on whitespace (sow=true)
|
||||||
|
for (String q : Arrays.asList("US", "U.S.", "USA", "U.S.A.")) {
|
||||||
|
for (String defType : Arrays.asList("lucene", "edismax")) {
|
||||||
|
assertJQ(req("q", q,
|
||||||
|
"defType", defType,
|
||||||
|
"df", "text",
|
||||||
|
"sow", "true")
|
||||||
|
, "/response/numFound==7"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (String q : Arrays.asList("United States", "United States of America")) {
|
||||||
|
for (String defType : Arrays.asList("lucene", "edismax")) {
|
||||||
|
assertJQ(req("q", q,
|
||||||
|
"defType", defType,
|
||||||
|
"df", "text",
|
||||||
|
"sow", "true")
|
||||||
|
, "/response/numFound==4"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPhrase() throws Exception {
|
||||||
|
for (String q : Arrays.asList
|
||||||
|
("\"US\"", "\"U.S.\"", "\"USA\"", "\"U.S.A.\"", "\"United States\"", "\"United States of America\"")) {
|
||||||
|
for (String defType : Arrays.asList("lucene", "edismax")) {
|
||||||
|
for (String sow : Arrays.asList("true", "false")) {
|
||||||
|
assertJQ(req("q", q,
|
||||||
|
"defType", defType,
|
||||||
|
"df", "text",
|
||||||
|
"sow", sow)
|
||||||
|
, "/response/numFound==5"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -16,7 +16,12 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.search;
|
package org.apache.solr.search;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
@ -28,12 +33,15 @@ import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TermInSetQuery;
|
import org.apache.lucene.search.TermInSetQuery;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.apache.solr.common.params.MapSolrParams;
|
||||||
import org.apache.solr.core.SolrInfoMBean;
|
import org.apache.solr.core.SolrInfoMBean;
|
||||||
import org.apache.solr.parser.QueryParser;
|
import org.apache.solr.parser.QueryParser;
|
||||||
import org.apache.solr.query.FilterQuery;
|
import org.apache.solr.query.FilterQuery;
|
||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.schema.TextField;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
import org.noggit.ObjectBuilder;
|
||||||
|
|
||||||
|
|
||||||
public class TestSolrQueryParser extends SolrTestCaseJ4 {
|
public class TestSolrQueryParser extends SolrTestCaseJ4 {
|
||||||
|
@ -57,6 +65,8 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
|
||||||
assertU(adoc("id", "12", "eee_s", "X"));
|
assertU(adoc("id", "12", "eee_s", "X"));
|
||||||
assertU(adoc("id", "13", "eee_s", "'balance'", "rrr_s", "/leading_slash"));
|
assertU(adoc("id", "13", "eee_s", "'balance'", "rrr_s", "/leading_slash"));
|
||||||
|
|
||||||
|
assertU(adoc("id", "20", "syn", "wifi ATM"));
|
||||||
|
|
||||||
assertU(commit());
|
assertU(commit());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -208,86 +218,105 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
|
||||||
QParser qParser;
|
QParser qParser;
|
||||||
Query q,qq;
|
Query q,qq;
|
||||||
|
|
||||||
// relevance query should not be a filter
|
Map<String, String> sowFalseParamsMap = new HashMap<>();
|
||||||
qParser = QParser.getParser("foo_s:(a b c)", req);
|
sowFalseParamsMap.put("sow", "false");
|
||||||
q = qParser.getQuery();
|
Map<String, String> sowTrueParamsMap = new HashMap<>();
|
||||||
assertEquals(3, ((BooleanQuery)q).clauses().size());
|
sowTrueParamsMap.put("sow", "true");
|
||||||
|
List<MapSolrParams> paramMaps = Arrays.asList
|
||||||
|
(new MapSolrParams(Collections.emptyMap()), // no sow param (i.e. the default sow value)
|
||||||
|
new MapSolrParams(sowFalseParamsMap),
|
||||||
|
new MapSolrParams(sowTrueParamsMap));
|
||||||
|
|
||||||
// small filter query should still use BooleanQuery
|
for (MapSolrParams params : paramMaps) {
|
||||||
if (QueryParser.TERMS_QUERY_THRESHOLD > 3) {
|
// relevance query should not be a filter
|
||||||
qParser = QParser.getParser("foo_s:(a b c)", req);
|
qParser = QParser.getParser("foo_s:(a b c)", req);
|
||||||
qParser.setIsFilter(true); // this may change in the future
|
qParser.setParams(params);
|
||||||
q = qParser.getQuery();
|
q = qParser.getQuery();
|
||||||
assertEquals(3, ((BooleanQuery) q).clauses().size());
|
assertEquals(3, ((BooleanQuery) q).clauses().size());
|
||||||
|
|
||||||
|
// small filter query should still use BooleanQuery
|
||||||
|
if (QueryParser.TERMS_QUERY_THRESHOLD > 3) {
|
||||||
|
qParser = QParser.getParser("foo_s:(a b c)", req);
|
||||||
|
qParser.setParams(params);
|
||||||
|
qParser.setIsFilter(true); // this may change in the future
|
||||||
|
q = qParser.getQuery();
|
||||||
|
assertEquals(3, ((BooleanQuery) q).clauses().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
// large relevancy query should use BooleanQuery
|
||||||
|
// TODO: we may decide that string fields shouldn't have relevance in the future... change to a text field w/o a stop filter if so
|
||||||
|
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
|
||||||
|
qParser.setParams(params);
|
||||||
|
q = qParser.getQuery();
|
||||||
|
assertEquals(26, ((BooleanQuery)q).clauses().size());
|
||||||
|
|
||||||
|
// large filter query should use TermsQuery
|
||||||
|
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
|
||||||
|
qParser.setIsFilter(true); // this may change in the future
|
||||||
|
qParser.setParams(params);
|
||||||
|
q = qParser.getQuery();
|
||||||
|
assertEquals(26, ((TermInSetQuery)q).getTermData().size());
|
||||||
|
|
||||||
|
// large numeric filter query should use TermsQuery (for trie fields)
|
||||||
|
qParser = QParser.getParser("foo_ti:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req);
|
||||||
|
qParser.setIsFilter(true); // this may change in the future
|
||||||
|
qParser.setParams(params);
|
||||||
|
q = qParser.getQuery();
|
||||||
|
assertEquals(20, ((TermInSetQuery)q).getTermData().size());
|
||||||
|
|
||||||
|
// for point fields large filter query should use PointInSetQuery
|
||||||
|
qParser = QParser.getParser("foo_pi:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req);
|
||||||
|
qParser.setIsFilter(true); // this may change in the future
|
||||||
|
qParser.setParams(params);
|
||||||
|
q = qParser.getQuery();
|
||||||
|
assertTrue(q instanceof PointInSetQuery);
|
||||||
|
assertEquals(20, ((PointInSetQuery)q).getPackedPoints().size());
|
||||||
|
|
||||||
|
// a filter() clause inside a relevancy query should be able to use a TermsQuery
|
||||||
|
qParser = QParser.getParser("foo_s:aaa filter(foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z))", req);
|
||||||
|
qParser.setParams(params);
|
||||||
|
q = qParser.getQuery();
|
||||||
|
assertEquals(2, ((BooleanQuery)q).clauses().size());
|
||||||
|
qq = ((BooleanQuery)q).clauses().get(0).getQuery();
|
||||||
|
if (qq instanceof TermQuery) {
|
||||||
|
qq = ((BooleanQuery)q).clauses().get(1).getQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (qq instanceof FilterQuery) {
|
||||||
|
qq = ((FilterQuery)qq).getQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(26, ((TermInSetQuery) qq).getTermData().size());
|
||||||
|
|
||||||
|
// test mixed boolean query, including quotes (which shouldn't matter)
|
||||||
|
qParser = QParser.getParser("foo_s:(a +aaa b -bbb c d e f bar_s:(qqq www) g h i j k l m n o p q r s t u v w x y z)", req);
|
||||||
|
qParser.setIsFilter(true); // this may change in the future
|
||||||
|
qParser.setParams(params);
|
||||||
|
q = qParser.getQuery();
|
||||||
|
assertEquals(4, ((BooleanQuery)q).clauses().size());
|
||||||
|
qq = null;
|
||||||
|
for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
|
||||||
|
qq = clause.getQuery();
|
||||||
|
if (qq instanceof TermInSetQuery) break;
|
||||||
|
}
|
||||||
|
assertEquals(26, ((TermInSetQuery)qq).getTermData().size());
|
||||||
|
|
||||||
|
// test terms queries of two different fields (LUCENE-7637 changed to require all terms be in the same field)
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int i=0; i<17; i++) {
|
||||||
|
char letter = (char)('a'+i);
|
||||||
|
sb.append("foo_s:" + letter + " bar_s:" + letter + " ");
|
||||||
|
}
|
||||||
|
qParser = QParser.getParser(sb.toString(), req);
|
||||||
|
qParser.setIsFilter(true); // this may change in the future
|
||||||
|
qParser.setParams(params);
|
||||||
|
q = qParser.getQuery();
|
||||||
|
assertEquals(2, ((BooleanQuery)q).clauses().size());
|
||||||
|
for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
|
||||||
|
qq = clause.getQuery();
|
||||||
|
assertEquals(17, ((TermInSetQuery)qq).getTermData().size());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// large relevancy query should use BooleanQuery
|
|
||||||
// TODO: we may decide that string fields shouldn't have relevance in the future... change to a text field w/o a stop filter if so
|
|
||||||
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
|
|
||||||
q = qParser.getQuery();
|
|
||||||
assertEquals(26, ((BooleanQuery)q).clauses().size());
|
|
||||||
|
|
||||||
// large filter query should use TermsQuery
|
|
||||||
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
|
|
||||||
qParser.setIsFilter(true); // this may change in the future
|
|
||||||
q = qParser.getQuery();
|
|
||||||
assertEquals(26, ((TermInSetQuery)q).getTermData().size());
|
|
||||||
|
|
||||||
// large numeric filter query should use TermsQuery (for trie fields)
|
|
||||||
qParser = QParser.getParser("foo_ti:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req);
|
|
||||||
qParser.setIsFilter(true); // this may change in the future
|
|
||||||
q = qParser.getQuery();
|
|
||||||
assertEquals(20, ((TermInSetQuery)q).getTermData().size());
|
|
||||||
|
|
||||||
// for point fields large filter query should use PointInSetQuery
|
|
||||||
qParser = QParser.getParser("foo_pi:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req);
|
|
||||||
qParser.setIsFilter(true); // this may change in the future
|
|
||||||
q = qParser.getQuery();
|
|
||||||
assertTrue(q instanceof PointInSetQuery);
|
|
||||||
assertEquals(20, ((PointInSetQuery)q).getPackedPoints().size());
|
|
||||||
|
|
||||||
// a filter() clause inside a relevancy query should be able to use a TermsQuery
|
|
||||||
qParser = QParser.getParser("foo_s:aaa filter(foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z))", req);
|
|
||||||
q = qParser.getQuery();
|
|
||||||
assertEquals(2, ((BooleanQuery)q).clauses().size());
|
|
||||||
qq = ((BooleanQuery)q).clauses().get(0).getQuery();
|
|
||||||
if (qq instanceof TermQuery) {
|
|
||||||
qq = ((BooleanQuery)q).clauses().get(1).getQuery();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (qq instanceof FilterQuery) {
|
|
||||||
qq = ((FilterQuery)qq).getQuery();
|
|
||||||
}
|
|
||||||
|
|
||||||
assertEquals(26, ((TermInSetQuery)qq).getTermData().size());
|
|
||||||
|
|
||||||
// test mixed boolean query, including quotes (which shouldn't matter)
|
|
||||||
qParser = QParser.getParser("foo_s:(a +aaa b -bbb c d e f bar_s:(qqq www) g h i j k l m n o p q r s t u v w x y z)", req);
|
|
||||||
qParser.setIsFilter(true); // this may change in the future
|
|
||||||
q = qParser.getQuery();
|
|
||||||
assertEquals(4, ((BooleanQuery)q).clauses().size());
|
|
||||||
qq = null;
|
|
||||||
for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
|
|
||||||
qq = clause.getQuery();
|
|
||||||
if (qq instanceof TermInSetQuery) break;
|
|
||||||
}
|
|
||||||
assertEquals(26, ((TermInSetQuery)qq).getTermData().size());
|
|
||||||
|
|
||||||
// test terms queries of two different fields (LUCENE-7637 changed to require all terms be in the same field)
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
for (int i=0; i<17; i++) {
|
|
||||||
char letter = (char)('a'+i);
|
|
||||||
sb.append("foo_s:" + letter + " bar_s:" + letter + " ");
|
|
||||||
}
|
|
||||||
qParser = QParser.getParser(sb.toString(), req);
|
|
||||||
qParser.setIsFilter(true); // this may change in the future
|
|
||||||
q = qParser.getQuery();
|
|
||||||
assertEquals(2, ((BooleanQuery)q).clauses().size());
|
|
||||||
for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
|
|
||||||
qq = clause.getQuery();
|
|
||||||
assertEquals(17, ((TermInSetQuery)qq).getTermData().size());
|
|
||||||
}
|
|
||||||
|
|
||||||
req.close();
|
req.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -306,6 +335,10 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
|
||||||
// This will still fail when used as the main query, but will pass in a filter query since TermsQuery can be used.
|
// This will still fail when used as the main query, but will pass in a filter query since TermsQuery can be used.
|
||||||
assertJQ(req("q","*:*", "fq", q)
|
assertJQ(req("q","*:*", "fq", q)
|
||||||
,"/response/numFound==6");
|
,"/response/numFound==6");
|
||||||
|
assertJQ(req("q","*:*", "fq", q, "sow", "false")
|
||||||
|
,"/response/numFound==6");
|
||||||
|
assertJQ(req("q","*:*", "fq", q, "sow", "true")
|
||||||
|
,"/response/numFound==6");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -540,4 +573,400 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
|
||||||
req.close();
|
req.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-7533
|
||||||
|
public void testSplitOnWhitespace_with_autoGeneratePhraseQueries() throws Exception {
|
||||||
|
assertTrue(((TextField)h.getCore().getLatestSchema().getField("text").getType()).getAutoGeneratePhraseQueries());
|
||||||
|
|
||||||
|
try (SolrQueryRequest req = req()) {
|
||||||
|
final QParser qparser = QParser.getParser("{!lucene sow=false qf=text}blah blah", req);
|
||||||
|
expectThrows(QueryParserConfigurationException.class, qparser::getQuery);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSplitOnWhitespace_Basic() throws Exception {
|
||||||
|
// The "syn" field has synonyms loaded from synonyms.txt
|
||||||
|
|
||||||
|
assertJQ(req("df", "syn", "q", "wifi", "sow", "true") // retrieve the single document containing literal "wifi"
|
||||||
|
, "/response/numFound==1"
|
||||||
|
, "/response/docs/[0]/id=='20'"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi", "sow", "false") // trigger the "wi fi => wifi" synonym
|
||||||
|
, "/response/numFound==1"
|
||||||
|
, "/response/docs/[0]/id=='20'"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi", "sow", "true")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi") // default sow=true
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene sow=false}wi fi")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
, "/response/docs/[0]/id=='20'"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene sow=true}wi fi")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene}wi fi") // default sow=true
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSplitOnWhitespace_Comments() throws Exception {
|
||||||
|
// The "syn" field has synonyms loaded from synonyms.txt
|
||||||
|
|
||||||
|
assertJQ(req("df", "syn", "q", "wifi", "sow", "true") // retrieve the single document containing literal "wifi"
|
||||||
|
, "/response/numFound==1"
|
||||||
|
, "/response/docs/[0]/id=='20'"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi", "sow", "false") // trigger the "wi fi => wifi" synonym
|
||||||
|
, "/response/numFound==1"
|
||||||
|
, "/response/docs/[0]/id=='20'"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi /* foo */ fi", "sow", "false") // trigger the "wi fi => wifi" synonym
|
||||||
|
, "/response/numFound==1"
|
||||||
|
, "/response/docs/[0]/id=='20'"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi /* foo */ /* bar */ fi", "sow", "false") // trigger the "wi fi => wifi" synonym
|
||||||
|
, "/response/numFound==1"
|
||||||
|
, "/response/docs/[0]/id=='20'"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", " /* foo */ wi fi /* bar */", "sow", "false") // trigger the "wi fi => wifi" synonym
|
||||||
|
, "/response/numFound==1"
|
||||||
|
, "/response/docs/[0]/id=='20'"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", " /* foo */ wi /* bar */ fi /* baz */", "sow", "false") // trigger the "wi fi => wifi" synonym
|
||||||
|
, "/response/numFound==1"
|
||||||
|
, "/response/docs/[0]/id=='20'"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi", "sow", "true")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi /* foo */ fi", "sow", "true")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi /* foo */ /* bar */ fi", "sow", "true")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "/* foo */ wi fi /* bar */", "sow", "true")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "/* foo */ wi /* bar */ fi /* baz */", "sow", "true")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi") // default sow=true
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi /* foo */ fi") // default sow=true
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi /* foo */ /* bar */ fi") // default sow=true
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "/* foo */ wi fi /* bar */") // default sow=true
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "/* foo */ wi /* bar */ fi /* baz */") // default sow=true
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene sow=false}wi fi")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
, "/response/docs/[0]/id=='20'"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene sow=false}wi /* foo */ fi")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
, "/response/docs/[0]/id=='20'"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene sow=false}wi /* foo */ /* bar */ fi")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
, "/response/docs/[0]/id=='20'"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene sow=false}/* foo */ wi fi /* bar */")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
, "/response/docs/[0]/id=='20'"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene sow=false}/* foo */ wi /* bar */ fi /* baz */")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
, "/response/docs/[0]/id=='20'"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene sow=true}wi fi")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene sow=true}wi /* foo */ fi")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene sow=true}wi /* foo */ /* bar */ fi")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene sow=true}/* foo */ wi fi /* bar */")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene sow=true}/* foo */ wi /* bar */ fi /* baz */")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene}wi fi") // default sow=true
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene}wi /* foo */ fi") // default sow=true
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene}wi /* foo */ /* bar */ fi") // default sow=true
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene}/* foo */ wi fi /* bar */") // default sow=true
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "{!lucene}/* foo */ wi /* bar */ fi /* baz */") // default sow=true
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testOperatorsAndMultiWordSynonyms() throws Exception {
|
||||||
|
// The "syn" field has synonyms loaded from synonyms.txt
|
||||||
|
|
||||||
|
assertJQ(req("df", "syn", "q", "wifi", "sow", "true") // retrieve the single document containing literal "wifi"
|
||||||
|
, "/response/numFound==1"
|
||||||
|
, "/response/docs/[0]/id=='20'"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi", "sow", "false") // trigger the "wi fi => wifi" synonym
|
||||||
|
, "/response/numFound==1"
|
||||||
|
, "/response/docs/[0]/id=='20'"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertJQ(req("df", "syn", "q", "+wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "-wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "!wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi* fi", "sow", "false") // matches because wi* matches wifi
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "w? fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi~1 fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi^2 fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi^=2 fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi +fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi -fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi !fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi*", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi?", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi~1", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi^2", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi^=2", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "syn:wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi syn:fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "NOT wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi NOT fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi AND ATM", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "ATM AND wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi && ATM", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "ATM && wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "(wi fi) AND ATM", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "ATM AND (wi fi)", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "(wi fi) && ATM", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "ATM && (wi fi)", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi OR NotThereAtAll", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "NotThereAtAll OR wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi || NotThereAtAll", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "NotThereAtAll || wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "(wi fi) OR NotThereAtAll", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "NotThereAtAll OR (wi fi)", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "(wi fi) || NotThereAtAll", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "NotThereAtAll || (wi fi)", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertJQ(req("df", "syn", "q", "\"wi\" fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi \"fi\"", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "(wi) fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi (fi)", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "/wi/ fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi /fi/", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "(wi fi)", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "+(wi fi)", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
|
||||||
|
Map all = (Map)ObjectBuilder.fromJSON(h.query(req("q", "*:*", "rows", "0", "wt", "json")));
|
||||||
|
int totalDocs = Integer.parseInt(((Map)all.get("response")).get("numFound").toString());
|
||||||
|
int allDocsExceptOne = totalDocs - 1;
|
||||||
|
|
||||||
|
assertJQ(req("df", "syn", "q", "-(wi fi)", "sow", "false")
|
||||||
|
, "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the syn field
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "!(wi fi)", "sow", "false")
|
||||||
|
, "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the syn field
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "NOT (wi fi)", "sow", "false")
|
||||||
|
, "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the syn field
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "(wi fi)^2", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "(wi fi)^=2", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "syn:(wi fi)", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "+ATM wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "-ATM wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "-NotThereAtAll wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "!ATM wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "!NotThereAtAll wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "NOT ATM wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "NOT NotThereAtAll wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "AT* wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "AT? wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "\"ATM\" wi fi", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi +ATM", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi -ATM", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi -NotThereAtAll", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi !ATM", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi !NotThereAtAll", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi NOT ATM", "sow", "false")
|
||||||
|
, "/response/numFound==0"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi NOT NotThereAtAll", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi AT*", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi AT?", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "wi fi \"ATM\"", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "\"wi fi\"~2", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
assertJQ(req("df", "syn", "q", "syn:\"wi fi\"", "sow", "false")
|
||||||
|
, "/response/numFound==1"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue