SOLR-2409: edismax: treat invalid fieldnames as a literal

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1094014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2011-04-16 16:51:11 +00:00
parent a336d4b940
commit b2530b74aa
3 changed files with 87 additions and 2 deletions

View File

@ -266,6 +266,10 @@ Bug Fixes
of the configured maxRetries, due to HttpClient having it's own retry mechanism
by default. The retryCount of HttpClient is now set to 0, and SolrJ does
the retry. (yonik)
* SOLR-2409: edismax parser - treat the text of a fielded query as a literal if the
fieldname does not exist. For example Mission: Impossible should not search on
the "Mission" field unless it's a valid field in the schema. (Ryan McKinley, yonik)

View File

@ -238,6 +238,7 @@ class ExtendedDismaxQParser extends QParser {
try {
up.setRemoveStopFilter(!stopwords);
up.exceptions = true;
parsedUserQuery = up.parse(mainUserQuery);
if (stopwords && isEmpty(parsedUserQuery)) {
@ -247,6 +248,7 @@ class ExtendedDismaxQParser extends QParser {
}
} catch (Exception e) {
// ignore failure and reparse later after escaping reserved chars
up.exceptions = false;
}
if (parsedUserQuery != null && doMinMatched) {
@ -785,12 +787,19 @@ class ExtendedDismaxQParser extends QParser {
RANGE
}
static final RuntimeException unknownField = new RuntimeException("UnknownField");
static {
unknownField.fillInStackTrace();
}
/**
* A subclass of SolrQueryParser that supports aliasing fields for
* constructing DisjunctionMaxQueries.
*/
class ExtendedSolrQueryParser extends SolrQueryParser {
/** A simple container for storing alias info
*/
protected class Alias {
@ -803,6 +812,7 @@ class ExtendedDismaxQParser extends QParser {
boolean allowWildcard=true;
int minClauseSize = 0; // minimum number of clauses per phrase query...
// used when constructing boosting part of query via sloppy phrases
boolean exceptions; // allow exceptions to be thrown (for example on a missing field)
ExtendedAnalyzer analyzer;
@ -982,6 +992,15 @@ class ExtendedDismaxQParser extends QParser {
return q;
}
} else {
// verify that a fielded query is actually on a field that exists... if not,
// then throw an exception to get us out of here, and we'll treat it like a
// literal when we try the escape+re-parse.
if (exceptions) {
FieldType ft = schema.getFieldTypeNoEx(field);
if (ft == null) throw unknownField;
}
return getQuery();
}
}

View File

@ -49,8 +49,13 @@ public class TestExtendedDismaxParser extends AbstractSolrTestCase {
"name", "The Zapper"));
assertU(adoc("id", "45", "trait_ss", "Chauvinist",
"title", "25 star General"));
assertU(adoc("id", "46", "trait_ss", "Obnoxious",
"subject", "Defeated the pacifists op the Gandhi nebula"));
assertU(adoc("id", "46",
"trait_ss", "Obnoxious",
"subject", "Defeated the pacifists op the Gandhi nebula",
"t_special", "literal:colon value",
"movies_t", "first is Mission: Impossible, second is Terminator 2: Judgement Day. Terminator:3 ok...",
"foo_i", "8"
));
assertU(adoc("id", "47", "trait_ss", "Pig",
"text", "line up and fly directly at the enemy death cannons, clogging them with wreckage!"));
assertU(adoc("id", "48", "text_sw", "this has gigabyte potential", "foo_i","100"));
@ -64,6 +69,11 @@ public class TestExtendedDismaxParser extends AbstractSolrTestCase {
String twor = "*[count(//doc)=2]";
String nor = "*[count(//doc)=0]";
assertQ("expected doc is missing (using un-escaped edismax w/qf)",
req("q", "literal:colon",
"qf", "t_special",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("standard request handler returns all matches",
req(allq),
@ -164,6 +174,58 @@ public class TestExtendedDismaxParser extends AbstractSolrTestCase {
"q","the big"), oner
);
// searching for a literal colon value when clearly not used for a field
assertQ("expected doc is missing (using standard)",
req("q", "t_special:literal\\:colon"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("expected doc is missing (using escaped edismax w/field)",
req("q", "t_special:literal\\:colon",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("expected doc is missing (using un-escaped edismax w/field)",
req("q", "t_special:literal:colon",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("expected doc is missing (using escaped edismax w/qf)",
req("q", "literal\\:colon",
"qf", "t_special",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("expected doc is missing (using un-escaped edismax w/qf)",
req("q", "literal:colon",
"qf", "t_special",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ(req("defType","edismax", "mm","100%", "q","terminator:3", "qf","movies_t"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Mission:Impossible", "qf","movies_t"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Mission : Impossible", "qf","movies_t"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Mission: Impossible", "qf","movies_t"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Terminator 2: Judgement Day", "qf","movies_t"),
oner);
// make sure the clause wasn't eliminated
assertQ(req("defType","edismax", "mm","100%", "q","Terminator 10: Judgement Day", "qf","movies_t"),
nor);
// throw in a numeric field
assertQ(req("defType","edismax", "mm","0", "q","Terminator: 100", "qf","movies_t foo_i"),
twor);
assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 100", "qf","movies_t foo_i"),
nor);
assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 8", "qf","movies_t foo_i"),
oner);
assertQ(req("defType","edismax", "mm","0", "q","movies_t:Terminator 100", "qf","movies_t foo_i"),
twor);
/** stopword removal in conjunction with multi-word synonyms at query time
* break this test.
// multi-word synonyms