From 2b65fd00e62c11dad71b691a255942769b4e560e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B8ydahl?= Date: Sat, 10 Mar 2012 09:42:41 +0000 Subject: [PATCH] SOLR-3026: eDismax: Locking down which fields can be explicitly queried (user fields aka uf) git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1299172 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 3 + .../search/ExtendedDismaxQParserPlugin.java | 334 +++++++++++++++--- .../solr/search/TestExtendedDismaxParser.java | 174 ++++++++- 3 files changed, 446 insertions(+), 65 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index d75656d66f9..7b87eb8e750 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -536,6 +536,9 @@ New Features * SOLR-2202: Currency FieldType, whith support for currencies and exchange rates (Greg Fodor & Andrew Morrison via janhoy, rmuir, Uwe Schindler) +* SOLR-3026: eDismax: Locking down which fields can be explicitly queried (user fields aka uf) + (janhoy, hossmann, Tomás Fernández Löbbe) + Optimizations ---------------------- * SOLR-1931: Speedup for LukeRequestHandler and admin/schema browser. New parameter diff --git a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java index 55e15508f35..b74e924ae25 100755 --- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java @@ -22,6 +22,16 @@ package org.apache.solr.search; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.queries.function.BoostedQuery; import org.apache.lucene.queries.function.FunctionQuery; import org.apache.lucene.queries.function.ValueSource; @@ -30,7 +40,6 @@ import org.apache.lucene.queries.function.valuesource.QueryValueSource; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.*; -import org.apache.lucene.analysis.Analyzer; import org.apache.solr.common.params.DisMaxParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; @@ -39,8 +48,6 @@ import org.apache.solr.schema.FieldType; import org.apache.solr.util.SolrPluginUtils; import org.apache.solr.analysis.*; -import java.util.*; - /** * An advanced multi-field query parser. * @lucene.experimental @@ -74,7 +81,10 @@ class ExtendedDismaxQParser extends QParser { /** shorten the class references for utilities */ private static interface DMP extends DisMaxParams { - /* :NOOP */ + /** + * User fields. The fields that can be used by the end user to create field-specific queries. + */ + public static String UF = "uf"; } @@ -82,15 +92,30 @@ class ExtendedDismaxQParser extends QParser { super(qstr, localParams, params, req); } - Map queryFields; - Query parsedUserQuery; + /** + * The field names specified by 'qf' that (most) clauses will + * be queried against + */ + private Map queryFields; + + /** + * The field names specified by 'uf' that users are + * allowed to include literally in their query string. The Float + * boost values will be applied automaticly to any clause using that + * field name. '*' will be treated as an alias for any + * field that exists in the schema. Wildcards are allowed to + * express dynamicFields. + */ + private UserFields userFields; + private Query parsedUserQuery; private String[] boostParams; private String[] multBoosts; private List boostQueries; private Query altUserQuery; private QParser altQParser; + private SolrParams solrParams; @Override @@ -98,11 +123,13 @@ class ExtendedDismaxQParser extends QParser { SolrParams localParams = getLocalParams(); SolrParams params = getParams(); - SolrParams solrParams = SolrParams.wrapDefaults(localParams, params); + solrParams = SolrParams.wrapDefaults(localParams, params); final String minShouldMatch = DisMaxQParser.parseMinShouldMatch(req.getSchema(), solrParams); + userFields = new UserFields(U.parseFieldBoosts(solrParams.getParams(DMP.UF))); + queryFields = SolrPluginUtils.parseFieldBoosts(solrParams.getParams(DisMaxParams.QF)); if (0 == queryFields.size()) { queryFields.put(req.getSchema().getDefaultSearchFieldName(), 1.0f); @@ -158,75 +185,59 @@ class ExtendedDismaxQParser extends QParser { new ExtendedSolrQueryParser(this, IMPOSSIBLE_FIELD_NAME); up.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, queryFields); + addAliasesFromRequest(up, tiebreaker); up.setPhraseSlop(qslop); // slop for explicit user phrase queries up.setAllowLeadingWildcard(true); // defer escaping and only do if lucene parsing fails, or we need phrases // parsing fails. Need to sloppy phrase queries anyway though. List clauses = null; - boolean specialSyntax = false; int numPluses = 0; int numMinuses = 0; - int numOptional = 0; - int numAND = 0; int numOR = 0; int numNOT = 0; - boolean sawLowerAnd=false; - boolean sawLowerOr=false; clauses = splitIntoClauses(userQuery, false); for (Clause clause : clauses) { - if (!clause.isPhrase && clause.hasSpecialSyntax) { - specialSyntax = true; - } if (clause.must == '+') numPluses++; if (clause.must == '-') numMinuses++; if (clause.isBareWord()) { String s = clause.val; - if ("AND".equals(s)) { - numAND++; - } else if ("OR".equals(s)) { + if ("OR".equals(s)) { numOR++; } else if ("NOT".equals(s)) { numNOT++; - } else if (lowercaseOperators) { - if ("and".equals(s)) { - numAND++; - sawLowerAnd=true; - } else if ("or".equals(s)) { - numOR++; - sawLowerOr=true; - } + } else if (lowercaseOperators && "or".equals(s)) { + numOR++; } } } - numOptional = clauses.size() - (numPluses + numMinuses); - // convert lower or mixed case operators to uppercase if we saw them. + // Always rebuild mainUserQuery from clauses to catch modifications from splitIntoClauses + // This was necessary for userFields modifications to get propagated into the query. + // Convert lower or mixed case operators to uppercase if we saw them. // only do this for the lucene query part and not for phrase query boosting // since some fields might not be case insensitive. // We don't use a regex for this because it might change and AND or OR in // a phrase query in a case sensitive field. - if (sawLowerAnd || sawLowerOr) { - StringBuilder sb = new StringBuilder(); - for (int i=0; i0 && i+10 && i+1 it = solrParams.getParameterNamesIterator(); + while(it.hasNext()) { + String param = it.next(); + if(param.startsWith("f.") && param.endsWith(".qf")) { + // Add the alias + String fname = param.substring(2,param.length()-3); + String qfReplacement = solrParams.get(param); + Map parsedQf = SolrPluginUtils.parseFieldBoosts(qfReplacement); + if(parsedQf.size() == 0) + return; + up.addAlias(fname, tiebreaker, parsedQf); + } + } + } + /** * Modifies the main query by adding a new optional Query consisting * of shingled phrase queries across the specified clauses using the @@ -598,6 +636,7 @@ class ExtendedDismaxQParser extends QParser { int end=s.length(); char ch=0; int start; + boolean disallowUserField = false; outer: while (pos < end) { ch = s.charAt(pos); @@ -614,6 +653,10 @@ class ExtendedDismaxQParser extends QParser { } clause.field = getFieldName(s, pos, end); + if(clause.field != null && !userFields.isAllowed(clause.field)) { + disallowUserField = true; + clause.field = null; + } if (clause.field != null) { pos += clause.field.length(); // skip the field name pos++; // skip the ':' @@ -709,15 +752,31 @@ class ExtendedDismaxQParser extends QParser { } if (clause != null) { - clause.raw = s.substring(start, pos); + if(disallowUserField) { + clause.raw = clause.val; + } else { + clause.raw = s.substring(start, pos); + // Add default userField boost if no explicit boost exists + if(userFields.isAllowed(clause.field) && !clause.raw.contains("^")) { + Float boost = userFields.getBoost(clause.field); + if(boost != null) + clause.raw += "^" + boost; + } + } lst.add(clause); } clause = new Clause(); + disallowUserField = false; } return lst; } + /** + * returns a field name or legal field alias from the current + * position of the string + * @param solrParams + */ public String getFieldName(String s, int pos, int end) { if (pos >= end) return null; int p=pos; @@ -731,10 +790,12 @@ class ExtendedDismaxQParser extends QParser { if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) return null; } String fname = s.substring(pos, p); - return getReq().getSchema().getFieldTypeNoEx(fname) == null ? null : fname; + boolean isInSchema = getReq().getSchema().getFieldTypeNoEx(fname) != null; + boolean isAlias = solrParams.get("f."+fname+".qf") != null; + + return (isInSchema || isAlias) ? fname : null; } - public static List split(String s, boolean ignoreQuote) { ArrayList lst = new ArrayList(4); int pos=0, start=0, end=s.length(); @@ -792,7 +853,7 @@ class ExtendedDismaxQParser extends QParser { * A subclass of SolrQueryParser that supports aliasing fields for * constructing DisjunctionMaxQueries. */ - class ExtendedSolrQueryParser extends SolrQueryParser { + static class ExtendedSolrQueryParser extends SolrQueryParser { /** A simple container for storing alias info @@ -870,6 +931,16 @@ class ExtendedDismaxQParser extends QParser { a.fields = fieldBoosts; aliases.put(field, a); } + + /** + * Returns the aliases found for a field. + * Returns null if there are no aliases for the field + * @param field + * @return + */ + public Alias getAlias(String field) { + return aliases.get(field); + } QType type; @@ -980,9 +1051,9 @@ class ExtendedDismaxQParser extends QParser { * DisjunctionMaxQuery. (so yes: aliases which point at other * aliases should work) */ - protected Query getAliasedQuery() - throws ParseException { + protected Query getAliasedQuery() throws ParseException { Alias a = aliases.get(field); + this.validateCyclicAliasing(field); if (a != null) { List lst = getQueries(a); if (lst == null || lst.size()==0) @@ -1018,15 +1089,43 @@ class ExtendedDismaxQParser extends QParser { } } + /** + * Validate there is no cyclic referencing in the aliasing + */ + private void validateCyclicAliasing(String field) throws ParseException { + Set set = new HashSet(); + set.add(field); + if(validateField(field, set)) { + throw new ParseException("Field aliases lead to a cycle"); + } + } + + private boolean validateField(String field, Set set) { + if(this.getAlias(field) == null) { + return false; + } + boolean hascycle = false; + for(String referencedField:this.getAlias(field).fields.keySet()) { + if(!set.add(referencedField)) { + hascycle = true; + } else { + if(validateField(referencedField, set)) { + hascycle = true; + } + set.remove(referencedField); + } + } + return hascycle; + } - protected List getQueries(Alias a) throws ParseException { + protected List getQueries(Alias a) throws ParseException { if (a == null) return null; if (a.fields.size()==0) return null; List lst= new ArrayList(4); for (String f : a.fields.keySet()) { this.field = f; - Query sub = getQuery(); + Query sub = getAliasedQuery(); if (sub != null) { Float boost = a.fields.get(f); if (boost != null) { @@ -1127,4 +1226,129 @@ class ExtendedDismaxQParser extends QParser { if (q instanceof BooleanQuery && ((BooleanQuery)q).clauses().size()==0) return true; return false; } + + /** + * Class that encapsulates the input from userFields parameter and can answer whether + * a field allowed or disallowed as fielded query in the query string + */ + static class UserFields { + private Map userFieldsMap; + private DynamicField[] dynamicUserFields; + private DynamicField[] negativeDynamicUserFields; + + UserFields(Map ufm) { + userFieldsMap = ufm; + if (0 == userFieldsMap.size()) { + userFieldsMap.put("*", null); + } + + // Process dynamic patterns in userFields + ArrayList dynUserFields = new ArrayList(); + ArrayList negDynUserFields = new ArrayList(); + for(String f : userFieldsMap.keySet()) { + if(f.contains("*")) { + if(f.startsWith("-")) + negDynUserFields.add(new DynamicField(f.substring(1))); + else + dynUserFields.add(new DynamicField(f)); + } + } + Collections.sort(dynUserFields); + dynamicUserFields = dynUserFields.toArray(new DynamicField[dynUserFields.size()]); + Collections.sort(negDynUserFields); + negativeDynamicUserFields = negDynUserFields.toArray(new DynamicField[negDynUserFields.size()]); +// System.out.println("** userF="+userFieldsMap+", dynUF="+Arrays.toString(dynamicUserFields)+", negDynUF="+Arrays.toString(negativeDynamicUserFields)); + } + + /** + * Is the given field name allowed according to UserFields spec given in the uf parameter? + * @param fname the field name to examine + * @return true if the fielded queries are allowed on this field + */ + public boolean isAllowed(String fname) { + boolean res = ((userFieldsMap.containsKey(fname) || isDynField(fname, false)) && + !userFieldsMap.containsKey("-"+fname) && + !isDynField(fname, true)); + return res; + } + + private boolean isDynField(String field, boolean neg) { + return getDynFieldForName(field, neg) == null ? false : true; + } + + private String getDynFieldForName(String f, boolean neg) { + for( DynamicField df : neg?negativeDynamicUserFields:dynamicUserFields ) { + if( df.matches( f ) ) return df.wildcard; + } + return null; + } + + /** + * Finds the default user field boost associated with the given field. + * This is parsed from the uf parameter, and may be specified as wildcards, e.g. *name^2.0 or *^3.0 + * @param field the field to find boost for + * @return the float boost value associated with the given field or a wildcard matching the field + */ + public Float getBoost(String field) { + return (userFieldsMap.containsKey(field)) ? + userFieldsMap.get(field) : // Exact field + userFieldsMap.get(getDynFieldForName(field, false)); // Dynamic field + } + } + + /* Represents a dynamic field, for easier matching, inspired by same class in IndexSchema */ + static class DynamicField implements Comparable { + final static int STARTS_WITH=1; + final static int ENDS_WITH=2; + final static int CATCHALL=3; + + final String wildcard; + final int type; + + final String str; + + protected DynamicField(String wildcard) { + this.wildcard = wildcard; + if (wildcard.equals("*")) { + type=CATCHALL; + str=null; + } + else if (wildcard.startsWith("*")) { + type=ENDS_WITH; + str=wildcard.substring(1); + } + else if (wildcard.endsWith("*")) { + type=STARTS_WITH; + str=wildcard.substring(0,wildcard.length()-1); + } + else { + throw new RuntimeException("dynamic field name must start or end with *"); + } + } + + /* + * Returns true if the regex wildcard for this DynamicField would match the input field name + */ + public boolean matches(String name) { + if (type==CATCHALL) return true; + else if (type==STARTS_WITH && name.startsWith(str)) return true; + else if (type==ENDS_WITH && name.endsWith(str)) return true; + else return false; + } + + /** + * Sort order is based on length of regex. Longest comes first. + * @param other The object to compare to. + * @return a negative integer, zero, or a positive integer + * as this object is less than, equal to, or greater than + * the specified object. + */ + public int compareTo(DynamicField other) { + return other.wildcard.length() - wildcard.length(); + } + + public String toString() { + return this.wildcard; + } + } } diff --git a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java index 9cf0f03449c..b49935e190f 100755 --- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java @@ -17,6 +17,9 @@ package org.apache.solr.search; +import java.io.IOException; + +import org.apache.solr.common.SolrException; import org.apache.solr.util.AbstractSolrTestCase; public class TestExtendedDismaxParser extends AbstractSolrTestCase { @@ -31,16 +34,6 @@ public class TestExtendedDismaxParser extends AbstractSolrTestCase { // if you override setUp or tearDown, you better call // the super classes version super.setUp(); - } - @Override - public void tearDown() throws Exception { - // if you override setUp or tearDown, you better call - // the super classes version - super.tearDown(); - } - - // test the edismax query parser based on the dismax parser - public void testFocusQueryParser() { assertU(adoc("id", "42", "trait_ss", "Tool", "trait_ss", "Obnoxious", "name", "Zapp Brannigan")); assertU(adoc("id", "43" , @@ -63,6 +56,16 @@ public class TestExtendedDismaxParser extends AbstractSolrTestCase { assertU(adoc("id", "50", "text_sw", "start new big city end")); assertU(commit()); + } + @Override + public void tearDown() throws Exception { + // if you override setUp or tearDown, you better call + // the super classes version + super.tearDown(); + } + + // test the edismax query parser based on the dismax parser + public void testFocusQueryParser() { String allq = "id:[42 TO 50]"; String allr = "*[count(//doc)=9]"; String oner = "*[count(//doc)=1]"; @@ -249,4 +252,155 @@ public class TestExtendedDismaxParser extends AbstractSolrTestCase { } + public void testUserFields() { + String oner = "*[count(//doc)=1]"; + String nor = "*[count(//doc)=0]"; + + // User fields + // Default is allow all "*" + // If a list of fields are given, only those are allowed "foo bar" + // Possible to invert with "-" syntax: + // Disallow all: "-*" + // Allow all but id: "* -id" + // Also supports "dynamic" field name wildcarding + assertQ(req("defType","edismax", "q","id:42"), + oner); + + assertQ(req("defType","edismax", "uf","*", "q","id:42"), + oner); + + assertQ(req("defType","edismax", "uf","id", "q","id:42"), + oner); + + assertQ(req("defType","edismax", "uf","-*", "q","id:42"), + nor); + + assertQ(req("defType","edismax", "uf","loremipsum", "q","id:42"), + nor); + + assertQ(req("defType","edismax", "uf","* -id", "q","id:42"), + nor); + + assertQ(req("defType","edismax", "uf","* -loremipsum", "q","id:42"), + oner); + + assertQ(req("defType","edismax", "uf","id^5.0", "q","id:42"), + oner); + + assertQ(req("defType","edismax", "uf","*^5.0", "q","id:42"), + oner); + + assertQ(req("defType","edismax", "uf","id^5.0", "q","id:42^10.0"), + oner); + + assertQ(req("defType","edismax", "uf","na*", "q","name:Zapp"), + oner); + + assertQ(req("defType","edismax", "uf","*me", "q","name:Zapp"), + oner); + + assertQ(req("defType","edismax", "uf","* -na*", "q","name:Zapp"), + nor); + + assertQ(req("defType","edismax", "uf","*me -name", "q","name:Zapp"), + nor); + + assertQ(req("defType","edismax", "uf","*ame -*e", "q","name:Zapp"), + nor); + + // Boosts from user fields + assertQ(req("defType","edismax", "debugQuery","true", "rows","0", "q","id:42"), + "//str[@name='parsedquery_toString'][.='+id:42']"); + + assertQ(req("defType","edismax", "debugQuery","true", "rows","0", "uf","*^5.0", "q","id:42"), + "//str[@name='parsedquery_toString'][.='+id:42^5.0']"); + + assertQ(req("defType","edismax", "debugQuery","true", "rows","0", "uf","*^2.0 id^5.0 -xyz", "q","name:foo"), + "//str[@name='parsedquery_toString'][.='+name:foo^2.0']"); + + assertQ(req("defType","edismax", "debugQuery","true", "rows","0", "uf","i*^5.0", "q","id:42"), + "//str[@name='parsedquery_toString'][.='+id:42^5.0']"); + + + assertQ(req("defType","edismax", "uf","-*", "q","cannons"), + oner); + + assertQ(req("defType","edismax", "uf","* -id", "q","42", "qf", "id"), oner); + + } + + public void testAliasing() throws IOException, Exception { + String oner = "*[count(//doc)=1]"; + String twor = "*[count(//doc)=2]"; + String nor = "*[count(//doc)=0]"; + + // Aliasing + // Single field + assertQ(req("defType","edismax", "q","myalias:Zapp"), + nor); + + assertQ(req("defType","edismax", "q","myalias:Zapp", "f.myalias.qf","name"), + oner); + + // Multi field + assertQ(req("defType","edismax", "uf", "myalias", "q","myalias:(Zapp Obnoxious)", "f.myalias.qf","name^2.0 mytrait_ss^5.0", "mm", "50%"), + oner); + + // Multi field + assertQ(req("defType","edismax", "q","Zapp Obnoxious", "f.myalias.qf","name^2.0 mytrait_ss^5.0"), + nor); + + assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","myalias^10.0", "f.myalias.qf","name^2.0 mytrait_ss^5.0"), oner); + assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","myalias^10.0", "f.myalias.qf","name^2.0 trait_ss^5.0"), twor); + assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","myalias^10.0", "f.myalias.qf","name^2.0 trait_ss^5.0", "mm", "100%"), oner); + assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","who^10.0 where^3.0", "f.who.qf","name^2.0", "f.where.qf", "mytrait_ss^5.0"), oner); + + assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","myalias", "f.myalias.qf","name mytrait_ss", "uf", "myalias"), oner); + + assertQ(req("defType","edismax", "uf","who", "q","who:(Zapp Obnoxious)", "f.who.qf", "name^2.0 trait_ss^5.0", "qf", "id"), twor); + assertQ(req("defType","edismax", "uf","* -name", "q","who:(Zapp Obnoxious)", "f.who.qf", "name^2.0 trait_ss^5.0"), twor); + + } + + public void testAliasingBoost() throws IOException, Exception { + assertQ(req("defType","edismax", "q","Zapp Pig", "qf","myalias", "f.myalias.qf","name trait_ss^0.5"), "//result/doc[1]/str[@name='id']=42", "//result/doc[2]/str[@name='id']=47");//doc 42 should score higher than 46 + assertQ(req("defType","edismax", "q","Zapp Pig", "qf","myalias^100 name", "f.myalias.qf","trait_ss^0.5"), "//result/doc[1]/str[@name='id']=47", "//result/doc[2]/str[@name='id']=42");//Now the order should be inverse + } + + public void testCyclicAliasing() throws IOException, Exception { + try { + h.query(req("defType","edismax", "q","Zapp Pig", "qf","who", "f.who.qf","name","f.name.qf","who")); + fail("Simple cyclic alising"); + } catch (SolrException e) { + assertTrue(e.getCause().getMessage().contains("Field aliases lead to a cycle")); + } + + try { + h.query(req("defType","edismax", "q","Zapp Pig", "qf","who", "f.who.qf","name","f.name.qf","myalias", "f.myalias.qf","who")); + fail(); + } catch (SolrException e) { + assertTrue(e.getCause().getMessage().contains("Field aliases lead to a cycle")); + } + + try { + h.query(req("defType","edismax", "q","Zapp Pig", "qf","field1", "f.field1.qf","field2 field3","f.field2.qf","field4 field5", "f.field4.qf","field5", "f.field5.qf","field6", "f.field3.qf","field6")); + } catch (SolrException e) { + fail("This is not cyclic alising"); + } + + try { + h.query(req("defType","edismax", "q","Zapp Pig", "qf","field1", "f.field1.qf","field2 field3", "f.field2.qf","field4 field5", "f.field4.qf","field5", "f.field5.qf","field4")); + fail(); + } catch (SolrException e) { + assertTrue(e.getCause().getMessage().contains("Field aliases lead to a cycle")); + } + + try { + h.query(req("defType","edismax", "q","who:(Zapp Pig)", "qf","field1", "f.who.qf","name","f.name.qf","myalias", "f.myalias.qf","who")); + fail(); + } catch (SolrException e) { + assertTrue(e.getCause().getMessage().contains("Field aliases lead to a cycle")); + } + } + }