SOLR-2409: edismax: treat invalid fieldnames as a literal

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1094014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2011-04-16 16:51:11 +00:00
parent a336d4b940
commit b2530b74aa
3 changed files with 87 additions and 2 deletions

View File

@ -267,6 +267,10 @@ Bug Fixes
by default. The retryCount of HttpClient is now set to 0, and SolrJ does
the retry. (yonik)
* SOLR-2409: edismax parser - treat the text of a fielded query as a literal if the
fieldname does not exist. For example Mission: Impossible should not search on
the "Mission" field unless it's a valid field in the schema. (Ryan McKinley, yonik)
Other Changes

View File

@ -238,6 +238,7 @@ class ExtendedDismaxQParser extends QParser {
try {
up.setRemoveStopFilter(!stopwords);
up.exceptions = true;
parsedUserQuery = up.parse(mainUserQuery);
if (stopwords && isEmpty(parsedUserQuery)) {
@ -247,6 +248,7 @@ class ExtendedDismaxQParser extends QParser {
}
} catch (Exception e) {
// ignore failure and reparse later after escaping reserved chars
up.exceptions = false;
}
if (parsedUserQuery != null && doMinMatched) {
@ -785,12 +787,19 @@ class ExtendedDismaxQParser extends QParser {
RANGE
}
static final RuntimeException unknownField = new RuntimeException("UnknownField");
static {
unknownField.fillInStackTrace();
}
/**
* A subclass of SolrQueryParser that supports aliasing fields for
* constructing DisjunctionMaxQueries.
*/
class ExtendedSolrQueryParser extends SolrQueryParser {
/** A simple container for storing alias info
*/
protected class Alias {
@ -803,6 +812,7 @@ class ExtendedDismaxQParser extends QParser {
boolean allowWildcard=true;
int minClauseSize = 0; // minimum number of clauses per phrase query...
// used when constructing boosting part of query via sloppy phrases
boolean exceptions; // allow exceptions to be thrown (for example on a missing field)
ExtendedAnalyzer analyzer;
@ -982,6 +992,15 @@ class ExtendedDismaxQParser extends QParser {
return q;
}
} else {
// verify that a fielded query is actually on a field that exists... if not,
// then throw an exception to get us out of here, and we'll treat it like a
// literal when we try the escape+re-parse.
if (exceptions) {
FieldType ft = schema.getFieldTypeNoEx(field);
if (ft == null) throw unknownField;
}
return getQuery();
}
}

View File

@ -49,8 +49,13 @@ public class TestExtendedDismaxParser extends AbstractSolrTestCase {
"name", "The Zapper"));
assertU(adoc("id", "45", "trait_ss", "Chauvinist",
"title", "25 star General"));
assertU(adoc("id", "46", "trait_ss", "Obnoxious",
"subject", "Defeated the pacifists op the Gandhi nebula"));
assertU(adoc("id", "46",
"trait_ss", "Obnoxious",
"subject", "Defeated the pacifists op the Gandhi nebula",
"t_special", "literal:colon value",
"movies_t", "first is Mission: Impossible, second is Terminator 2: Judgement Day. Terminator:3 ok...",
"foo_i", "8"
));
assertU(adoc("id", "47", "trait_ss", "Pig",
"text", "line up and fly directly at the enemy death cannons, clogging them with wreckage!"));
assertU(adoc("id", "48", "text_sw", "this has gigabyte potential", "foo_i","100"));
@ -64,6 +69,11 @@ public class TestExtendedDismaxParser extends AbstractSolrTestCase {
String twor = "*[count(//doc)=2]";
String nor = "*[count(//doc)=0]";
assertQ("expected doc is missing (using un-escaped edismax w/qf)",
req("q", "literal:colon",
"qf", "t_special",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("standard request handler returns all matches",
req(allq),
@ -164,6 +174,58 @@ public class TestExtendedDismaxParser extends AbstractSolrTestCase {
"q","the big"), oner
);
// searching for a literal colon value when clearly not used for a field
assertQ("expected doc is missing (using standard)",
req("q", "t_special:literal\\:colon"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("expected doc is missing (using escaped edismax w/field)",
req("q", "t_special:literal\\:colon",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("expected doc is missing (using un-escaped edismax w/field)",
req("q", "t_special:literal:colon",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("expected doc is missing (using escaped edismax w/qf)",
req("q", "literal\\:colon",
"qf", "t_special",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("expected doc is missing (using un-escaped edismax w/qf)",
req("q", "literal:colon",
"qf", "t_special",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ(req("defType","edismax", "mm","100%", "q","terminator:3", "qf","movies_t"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Mission:Impossible", "qf","movies_t"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Mission : Impossible", "qf","movies_t"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Mission: Impossible", "qf","movies_t"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Terminator 2: Judgement Day", "qf","movies_t"),
oner);
// make sure the clause wasn't eliminated
assertQ(req("defType","edismax", "mm","100%", "q","Terminator 10: Judgement Day", "qf","movies_t"),
nor);
// throw in a numeric field
assertQ(req("defType","edismax", "mm","0", "q","Terminator: 100", "qf","movies_t foo_i"),
twor);
assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 100", "qf","movies_t foo_i"),
nor);
assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 8", "qf","movies_t foo_i"),
oner);
assertQ(req("defType","edismax", "mm","0", "q","movies_t:Terminator 100", "qf","movies_t foo_i"),
twor);
/** stopword removal in conjunction with multi-word synonyms at query time
* break this test.
// multi-word synonyms