SOLR-11501: limit query parser switching to thwart hacking

* starting a query with {!...} only works when the default parser is lucene or func.
* edismax now requires uf=_query_ to allow advanced sub-queries.
This commit is contained in:
David Smiley 2017-11-22 08:42:42 -05:00
parent d0d8a75717
commit 9b52571d8c
14 changed files with 160 additions and 118 deletions

View File

@ -45,6 +45,25 @@ Apache UIMA 2.3.1
Apache ZooKeeper 3.4.10
Jetty 9.3.20.v20170531
Upgrade Notes
----------------------
* SOLR-11501: Starting a query string with local-params {!myparser ...} is used to switch the query parser to another,
and is intended for use by Solr system developers, not end users doing searches. To reduce negative side-effects of
unintended hack-ability, we've limited the cases that local-params will be parsed to only contexts in which the
default parser is "lucene" or "func".
So if defType=edismax then q={!myparser ...} won't work. In that example, put the desired query parser into defType.
Another example is if deftype=edismax then hl.q={!myparser ...} won't work for the same reason. In that example,
either put the desired query parser into hl.qparser or set hl.qparser=lucene. Most users won't run into these cases
but some will and must change.
If you must have full backwards compatibility, use luceneMatchVersion=7.1.0 or something earlier. (David Smiley)
* SOLR-11501: The edismax parser by default no longer allows subqueries that specify a Solr parser using either
local-params, or the older _query_ magic field trick. For example
{!prefix f=myfield v=enterp} or _query_:"{!prefix f=myfield v=enterp}" are not supported by default anymore.
If you want to allow power-users to do this, set uf=*,_query_ or some other value that includes _query_.
If you must have full backwards compatibility, use luceneMatchVersion=7.1.0 or something earlier. (David Smiley)
New Features
----------------------
* SOLR-11448: Implement an option in collection commands to wait for final results. (ab)

View File

@ -43,6 +43,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.QueryBuilder;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
@ -96,6 +97,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
boolean autoGeneratePhraseQueries = false;
boolean allowSubQueryParsing = false;
int flags;
protected IndexSchema schema;
@ -189,6 +191,10 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
this.flags = parser.getFlags();
this.defaultField = defaultField;
setAnalyzer(schema.getQueryAnalyzer());
// TODO in 8.0(?) remove this. Prior to 7.2 we defaulted to allowing sub-query parsing by default
if (!parser.getReq().getCore().getSolrConfig().luceneMatchVersion.onOrAfter(Version.LUCENE_7_2_0)) {
setAllowSubQueryParsing(true);
} // otherwise defaults to false
}
// Turn on the "filter" bit and return the previous flags for the caller to save
@ -307,6 +313,22 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
return phraseSlop;
}
/** @see #setAllowLeadingWildcard(boolean) */
public boolean isAllowSubQueryParsing() {
return allowSubQueryParsing;
}
/**
* Set to enable subqueries to be parsed. If now allowed, the default, a {@link SyntaxError}
* will likely be thrown.
* Here is the preferred syntax using local-params:
* <code>{!prefix f=field v=foo}</code>
* and here is the older one, using a magic field name:
* <code>_query_:"{!prefix f=field v=foo}"</code>.
*/
public void setAllowSubQueryParsing(boolean allowSubQueryParsing) {
this.allowSubQueryParsing = allowSubQueryParsing;
}
/**
* Set to <code>true</code> to allow leading wildcard characters.
@ -939,7 +961,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
} else {
// intercept magic field name of "_" to use as a hook for our
// own functions.
if (field.charAt(0) == '_' && parser != null) {
if (allowSubQueryParsing && field.charAt(0) == '_' && parser != null) {
MagicFieldName magic = MagicFieldName.get(field);
if (null != magic) {
subQParser = parser.subQuery(queryText, magic.subParser);
@ -983,7 +1005,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
} else {
// intercept magic field name of "_" to use as a hook for our
// own functions.
if (field.charAt(0) == '_' && parser != null) {
if (allowSubQueryParsing && field.charAt(0) == '_' && parser != null) {
MagicFieldName magic = MagicFieldName.get(field);
if (null != magic) {
subQParser = parser.subQuery(String.join(" ", queryTerms), magic.subParser);
@ -1132,6 +1154,9 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
// called from parser
protected Query getLocalParams(String qfield, String lparams) throws SyntaxError {
if (!allowSubQueryParsing) {
throw new SyntaxError("local-params subquery is disabled");
}
QParser nested = parser.subQuery(lparams, null);
return nested.getQuery();
}

View File

@ -146,6 +146,7 @@ public class ExtendedDismaxQParser extends QParser {
addAliasesFromRequest(up, config.tiebreaker);
up.setPhraseSlop(config.qslop); // slop for explicit user phrase queries
up.setAllowLeadingWildcard(true);
up.setAllowSubQueryParsing(config.userFields.isAllowed(MagicFieldName.QUERY.field));
// defer escaping and only do if lucene parsing fails, or we need phrases
// parsing fails. Need to sloppy phrase queries anyway though.
@ -618,85 +619,7 @@ public class ExtendedDismaxQParser extends QParser {
}
debugInfo.add("boostfuncs", getReq().getParams().getParams(DisMaxParams.BF));
}
// FIXME: Not in use
// public static CharSequence partialEscape(CharSequence s) {
// StringBuilder sb = new StringBuilder();
//
// int len = s.length();
// for (int i = 0; i < len; i++) {
// char c = s.charAt(i);
// if (c == ':') {
// // look forward to make sure it's something that won't
// // cause a parse exception (something that won't be escaped... like
// // +,-,:, whitespace
// if (i+1<len && i>0) {
// char ch = s.charAt(i+1);
// if (!(Character.isWhitespace(ch) || ch=='+' || ch=='-' || ch==':')) {
// // OK, at this point the chars after the ':' will be fine.
// // now look back and try to determine if this is a fieldname
// // [+,-]? [letter,_] [letter digit,_,-,.]*
// // This won't cover *all* possible lucene fieldnames, but we should
// // only pick nice names to begin with
// int start, pos;
// for (start=i-1; start>=0; start--) {
// ch = s.charAt(start);
// if (Character.isWhitespace(ch)) break;
// }
//
// // skip whitespace
// pos = start+1;
//
// // skip leading + or -
// ch = s.charAt(pos);
// if (ch=='+' || ch=='-') {
// pos++;
// }
//
// // we don't need to explicitly check for end of string
// // since ':' will act as our sentinal
//
// // first char can't be '-' or '.'
// ch = s.charAt(pos++);
// if (Character.isJavaIdentifierPart(ch)) {
//
// for(;;) {
// ch = s.charAt(pos++);
// if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) {
// break;
// }
// }
//
// if (pos<=i) {
// // OK, we got to the ':' and everything looked like a valid fieldname, so
// // don't escape the ':'
// sb.append(':');
// continue; // jump back to start of outer-most loop
// }
//
// }
//
//
// }
// }
//
// // we fell through to here, so we should escape this like other reserved chars.
// sb.append('\\');
// }
// else if (c == '\\' || c == '!' || c == '(' || c == ')' ||
// c == '^' || c == '[' || c == ']' ||
// c == '{' || c == '}' || c == '~' || c == '*' || c == '?'
// )
// {
// sb.append('\\');
// }
// sb.append(c);
// }
// return sb;
// }
protected static class Clause {
boolean isBareWord() {
@ -1509,7 +1432,7 @@ public class ExtendedDismaxQParser extends QParser {
private DynamicField[] dynamicUserFields;
private DynamicField[] negativeDynamicUserFields;
UserFields(Map<String,Float> ufm) {
UserFields(Map<String, Float> ufm, boolean forbidSubQueryByDefault) {
userFieldsMap = ufm;
if (0 == userFieldsMap.size()) {
userFieldsMap.put("*", null);
@ -1526,6 +1449,10 @@ public class ExtendedDismaxQParser extends QParser {
dynUserFields.add(new DynamicField(f));
}
}
// unless "_query_" was expressly allowed, we forbid it.
if (forbidSubQueryByDefault && !userFieldsMap.containsKey(MagicFieldName.QUERY.field)) {
userFieldsMap.put("-" + MagicFieldName.QUERY.field, null);
}
Collections.sort(dynUserFields);
dynamicUserFields = dynUserFields.toArray(new DynamicField[dynUserFields.size()]);
Collections.sort(negDynUserFields);
@ -1674,7 +1601,8 @@ public class ExtendedDismaxQParser extends QParser {
SolrParams params, SolrQueryRequest req) {
solrParams = SolrParams.wrapDefaults(localParams, params);
minShouldMatch = DisMaxQParser.parseMinShouldMatch(req.getSchema(), solrParams); // req.getSearcher() here causes searcher refcount imbalance
userFields = new UserFields(U.parseFieldBoosts(solrParams.getParams(DMP.UF)));
final boolean forbidSubQueryByDefault = req.getCore().getSolrConfig().luceneMatchVersion.onOrAfter(Version.LUCENE_7_2_0);
userFields = new UserFields(U.parseFieldBoosts(solrParams.getParams(DMP.UF)), forbidSubQueryByDefault);
try {
queryFields = DisMaxQParser.parseQueryFields(req.getSchema(), solrParams); // req.getSearcher() here causes searcher refcount imbalance
} catch (SyntaxError e) {

View File

@ -177,7 +177,7 @@ public class Grouping {
}
public void addFunctionCommand(String groupByStr, SolrQueryRequest request) throws SyntaxError {
QParser parser = QParser.getParser(groupByStr, "func", request);
QParser parser = QParser.getParser(groupByStr, FunctionQParserPlugin.NAME, request);
Query q = parser.getQuery();
final Grouping.Command gc;
if (q instanceof FunctionQuery) {

View File

@ -44,6 +44,7 @@ public class LuceneQParser extends QParser {
lparser.setDefaultOperator(QueryParsing.parseOP(getParam(QueryParsing.OP)));
lparser.setSplitOnWhitespace(StrUtils.parseBool
(getParam(QueryParsing.SPLIT_ON_WHITESPACE), SolrQueryParser.DEFAULT_SPLIT_ON_WHITESPACE));
lparser.setAllowSubQueryParsing(true);
return lparser.parse(qstr);
}

View File

@ -18,6 +18,7 @@ package org.apache.solr.search;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
@ -36,6 +37,10 @@ public class NestedQParserPlugin extends QParserPlugin {
@Override
public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
if (localParams == null) { // avoid an NPE later
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"the 'query' QParser must be invoked with local-params, e.g. {!query defType=...}");
}
return new QParser(qstr, localParams, params, req) {
QParser baseParser;
ValueSource vs;

View File

@ -161,8 +161,9 @@ public abstract class QParser {
/**
* Returns the resulting query from this QParser, calling parse() only the
* first time and caching the Query result.
* first time and caching the Query result. <em>A null return is possible!</em>
*/
//TODO never return null; standardize the semantics
public Query getQuery() throws SyntaxError {
if (query==null) {
query=parse();
@ -292,9 +293,10 @@ public abstract class QParser {
debugInfo.add("QParser", this.getClass().getSimpleName());
}
/** Create a <code>QParser</code> to parse <code>qstr</code>,
/**
* Create a {@link QParser} to parse <code>qstr</code>,
* using the "lucene" (QParserPlugin.DEFAULT_QTYPE) query parser.
* The query parser may be overridden by local parameters in the query
* The query parser may be overridden by local-params in the query
* string itself. For example if
* qstr=<code>{!prefix f=myfield}foo</code>
* then the prefix query parser will be used.
@ -303,25 +305,41 @@ public abstract class QParser {
return getParser(qstr, QParserPlugin.DEFAULT_QTYPE, req);
}
/** Create a <code>QParser</code> to parse <code>qstr</code>,
* assuming that the default query parser is <code>defaultParser</code>.
* The query parser may be overridden by local parameters in the query
* string itself. For example if defaultParser=<code>"dismax"</code>
* and qstr=<code>foo</code>, then the dismax query parser will be used
* to parse and construct the query object. However
* if qstr=<code>{!prefix f=myfield}foo</code>
* then the prefix query parser will be used.
/**
* Create a {@link QParser} to parse <code>qstr</code> using the <code>defaultParser</code>.
* Note that local-params is only parsed when the defaultParser is "lucene" or "func".
*/
public static QParser getParser(String qstr, String defaultParser, SolrQueryRequest req) throws SyntaxError {
// SolrParams localParams = QueryParsing.getLocalParams(qstr, req.getParams());
boolean allowLocalParams = defaultParser == null || defaultParser.equals(QParserPlugin.DEFAULT_QTYPE)
|| defaultParser.equals(FunctionQParserPlugin.NAME);
return getParser(qstr, defaultParser, allowLocalParams, req);
}
/**
* Expert: Create a {@link QParser} to parse {@code qstr} using the {@code parserName} parser, while allowing a
* toggle for whether local-params may be parsed.
* The query parser may be overridden by local parameters in the query string itself
* (assuming {@code allowLocalParams}.
* For example if parserName=<code>dismax</code> and qstr=<code>foo</code>,
* then the dismax query parser will be used to parse and construct the query object.
* However if qstr=<code>{!prefix f=myfield}foo</code> then the prefix query parser will be used.
*
* @param allowLocalParams Whether this query parser should parse local-params syntax.
* Note that the "lucene" query parser natively parses local-params regardless.
* @lucene.internal
*/
public static QParser getParser(String qstr, String parserName, boolean allowLocalParams, SolrQueryRequest req) throws SyntaxError {
// SolrParams localParams = QueryParsing.getLocalParams(qstr, req.getParams());
if (parserName == null) {
parserName = QParserPlugin.DEFAULT_QTYPE;//"lucene"
}
String stringIncludingLocalParams = qstr;
ModifiableSolrParams localParams = null;
SolrParams globalParams = req.getParams();
boolean valFollowedParams = true;
int localParamsEnd = -1;
if (qstr != null && qstr.startsWith(QueryParsing.LOCALPARAM_START)) {
if (allowLocalParams && qstr != null && qstr.startsWith(QueryParsing.LOCALPARAM_START)) {
localParams = new ModifiableSolrParams();
localParamsEnd = QueryParsing.parseLocalParams(qstr, 0, localParams, globalParams);
@ -329,26 +347,18 @@ public abstract class QParser {
if (val != null) {
// val was directly specified in localParams via v=<something> or v=$arg
valFollowedParams = false;
//TODO if remainder of query string after '}' is non-empty, then what? Throw error? Fall back to lucene QParser?
} else {
// use the remainder of the string as the value
valFollowedParams = true;
val = qstr.substring(localParamsEnd);
localParams.set(QueryParsing.V, val);
}
}
String parserName;
if (localParams == null) {
parserName = defaultParser;
} else {
parserName = localParams.get(QueryParsing.TYPE,defaultParser);
parserName = localParams.get(QueryParsing.TYPE,parserName);
qstr = localParams.get("v");
}
parserName = parserName==null ? QParserPlugin.DEFAULT_QTYPE : parserName;
QParserPlugin qplug = req.getCore().getQueryPlugin(parserName);
QParser parser = qplug.createParser(qstr, localParams, req.getParams(), req);

View File

@ -162,8 +162,8 @@ public class ChildFieldValueSourceParser extends ValueSourceParser {
final Query query;
if (fp.hasMoreArguments()){
query = fp.parseNestedQuery();
}else{
query = fp.subQuery(fp.getParam(CommonParams.Q), BlockJoinParentQParserPlugin.NAME).getQuery();
} else {
query = fp.subQuery(fp.getParam(CommonParams.Q), null).getQuery();
}
BitSetProducer parentFilter;

View File

@ -169,6 +169,26 @@ public class DisMaxRequestHandlerTest extends SolrTestCaseJ4 {
"q", "\"cool chick\"" )
,"//*[@numFound='1']"
);
}
@Test
public void testSubQueriesNotSupported() {
// See org.apache.solr.search.TestSolrQueryParser.testNestedQueryModifiers()
assertQ("don't parse subqueries",
req("defType", "dismax",
"df", "doesnotexist_s",
"q", "_query_:\"{!v=$qq}\"",
"qq", "features_t:cool")
,"//*[@numFound='0']"
);
assertQ("don't parse subqueries",
req("defType", "dismax",
"df", "doesnotexist_s",
"q", "{!v=$qq}",
"qq", "features_t:cool")
,"//*[@numFound='0']"
);
}
@Test

View File

@ -1007,7 +1007,7 @@ public class HighlighterTest extends SolrTestCaseJ4 {
"//lst[@name='highlighting']/lst[@name='1']" +
"/arr[@name='title']/str='Apache Software <em>Foundation</em>'");
assertQ("hl.q parameter uses localparam parser definition correctly",
req("q", "Apache", "defType", "edismax", "qf", "title t_text", "hl", "true", "hl.fl", "title", "hl.q", "{!edismax}Software"),
req("q", "Apache", "defType", "edismax", "qf", "title t_text", "hl", "true", "hl.fl", "title", "hl.q", "{!edismax}Software", "hl.qparser", "lucene"),
"//lst[@name='highlighting']/lst[@name='1']" +
"/arr[@name='title']/str='Apache <em>Software</em> Foundation'");
assertQ("hl.q parameter uses defType correctly",

View File

@ -1049,7 +1049,7 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
SolrQueryResponse rsp = new SolrQueryResponse();
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req,rsp));
for (int i = 0; i < inputs.length; i++) {
queries[i] = (QParser.getParser(inputs[i], defType, req).getQuery());
queries[i] = QParser.getParser(inputs[i], defType, true, req).getQuery();
}
} finally {
SolrRequestInfo.clearRequestInfo();

View File

@ -16,6 +16,7 @@
*/
package org.apache.solr.search;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.util.AbstractSolrTestCase;
@ -164,6 +165,15 @@ public class TestComplexPhraseQParserPlugin extends AbstractSolrTestCase {
"//result[@numFound='2']"
);
assertQEx("don't parse subqueries",
"SyntaxError",
sumLRF.makeRequest("_query_:\"{!prefix f=name v=smi}\""), SolrException.ErrorCode.BAD_REQUEST
);
assertQEx("don't parse subqueries",
"SyntaxError",
sumLRF.makeRequest("{!prefix f=name v=smi}"), SolrException.ErrorCode.BAD_REQUEST
);
}
@Test

View File

@ -51,7 +51,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
index();
}
public static void index() throws Exception {
public static void index() throws Exception {
assertU(adoc("id", "42", "trait_ss", "Tool", "trait_ss", "Obnoxious",
"name", "Zapp Brannigan"));
assertU(adoc("id", "43" ,
@ -393,13 +393,22 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
// special psuedo-fields like _query_ and _val_
// special fields (and real field id) should be included by default
// _query_ should be excluded by default
assertQ(req("defType", "edismax",
"mm", "100%",
"fq", "id:51",
"q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""),
oner);
// should also work when explicitly allowed
"q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\"",
"debugQuery", "true"),
nor,
"//str[@name='parsedquery_toString'][.='+(((text:queri) (text:\"geofilt d 20 sfield store pt 12 34 56 78\"))~2)']");
// again; this time use embedded local-params style
assertQ(req("defType", "edismax",
"mm", "100%",
"fq", "id:51",
"q", " {!geofilt d=20 sfield=store pt=12.34,-56.78}"),//notice leading space
nor);
// should work when explicitly allowed
assertQ(req("defType", "edismax",
"mm", "100%",
"fq", "id:51",
@ -413,6 +422,14 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
"uf", "_query_",
"q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""),
oner);
// again; this time use embedded local-params style
assertQ(req("defType", "edismax",
"mm", "100%",
"fq", "id:51",
"uf", "id",
"uf", "_query_",
"q", " {!geofilt d=20 sfield=store pt=12.34,-56.78}"),//notice leading space
oner);
// should fail when prohibited
assertQ(req("defType", "edismax",
@ -424,7 +441,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
assertQ(req("defType", "edismax",
"mm", "100%",
"fq", "id:51",
"uf", "id", // excluded by ommision
"uf", "id", // excluded by omission
"q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""),
nor);
@ -2046,4 +2063,11 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
, "/response/numFound==1"
);
}
/** SOLR-11512 */
@Test(expected=SolrException.class)
public void killInfiniteRecursionParse() throws Exception {
assertJQ(req("defType", "edismax", "q", "*", "qq", "{!edismax v=something}", "bq", "{!edismax v=$qq}"));
}
}

View File

@ -430,7 +430,7 @@ public class TestQueryTypes extends AbstractSolrTestCase {
);
assertQ("test nested nested query",
req("q","_query_:\"{!query defType=query v=$q1}\"", "q1","{!v=$q2}","q2","{!prefix f=v_t v=$qqq}","qqq","hel")
req("q","_query_:\"{!query v=$q1}\"", "q1","{!v=$q2}","q2","{!prefix f=v_t v=$qqq}","qqq","hel")
,"//result[@numFound='2']"
);
assertQ("Test text field with no analysis doesn't NPE with wildcards (SOLR-4318)",