new DisMaxRequestHandler as well as some generic SolrPluginUtils

git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@408103 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2006-05-20 22:17:21 +00:00
parent 375d553731
commit d63338bd35
7 changed files with 1793 additions and 9 deletions

View File

@ -180,14 +180,43 @@
--> -->
<requestHandler name="standard" class="solr.StandardRequestHandler" /> <requestHandler name="standard" class="solr.StandardRequestHandler" />
<!-- example of a request handler with custom parameters passed to it's init() <!-- DisMaxRequestHandler is an example of a request handler that
<requestHandler name="example" class="myorg.mypkg.MyRequestHandler" > supports optional parameters which are passed to
<int name="myparam">1000</int> it's init() method.
<float name="ratio">1.4142135</float>
<arr name="myarr"><int>1</int><int>2</int></arr>
<str>foo</str>
</requestHandler>
--> -->
<requestHandler name="dismax" class="solr.DisMaxRequestHandler" >
<float name="tie">0.01</float>
<str name="qf">
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
</str>
<str name="pf">
text^0.2 features^1.1 name^1.5 manu^1.4 manu_exact^1.9
</str>
<str name="bf">
ord(poplarity)^0.5 recip(rord(price),1,1000,1000)^0.3
</str>
<str name="fl">
id,name,price,score
</str>
<str name="mm">
2&lt;-1 5&lt;-2 6&lt;90%
</str>
<int name="ps">100</int>
</requestHandler>
<!-- Note how you can register the same handler multiple times with
different names (and different init parameters)
-->
<requestHandler name="instock" class="solr.DisMaxRequestHandler" >
<str name="fq">
inStock:true
</str>
<str name="qf">
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
</str>
<str name="mm">
2&lt;-1 5&lt;-2 6&lt;90%
</str>
</requestHandler>
<!-- config for the admin interface --> <!-- config for the admin interface -->
<admin> <admin>

View File

@ -0,0 +1,374 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.request;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.core.SolrException;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocListAndSet;
import org.apache.solr.search.SolrCache;
import org.apache.solr.search.SolrQueryParser;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.CacheRegenerator;
import org.apache.solr.request.StandardRequestHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.FieldType;
import org.apache.solr.util.StrUtils;
import org.apache.solr.util.NamedList;
import org.apache.solr.util.SolrPluginUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.analysis.Analyzer;
import org.xmlpull.v1.XmlPullParserException;
/* this is the standard logging framework for Solr */
import java.util.logging.Logger;
import java.util.logging.Level;
import java.util.logging.Handler;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import java.util.Collection;
import java.util.Set;
import java.util.HashSet;
import java.util.Map;
import java.util.HashMap;
import java.util.regex.Pattern;
import java.io.IOException;
import java.net.URL;
/**
* <p>
* A Generic query plugin designed to be given a simple query expression
* from a user, which it will then query agaisnt a variety of
* pre-configured fields, in a variety of ways, using BooleanQueries,
* DisjunctionMaxQueries, and PhraseQueries.
* </p>
*
* <p>
* All of the following options may be configured for this plugin
* in the solrconfig as defaults, and may be overriden as request parameters
* </p>
*
* <ul>
* <li>tie - (Tie breaker) float value to use as tiebreaker in
* DisjunctionMaxQueries (should be something much less then 1)
* </li>
* <li> qf - (Query Fields) fields and boosts to use when building
* DisjunctionMaxQueries from the users query. Format is:
* "<code>fieldA^1.0 fieldB^2.2</code>".
* </li>
* <li> mm - (Minimum Match) this supports a wide variety of
* complex expressions.
* read {@link SolrPluginUtils#setMinShouldMatch SolrPluginUtils.setMinShouldMatch} for full details.
* </li>
* <li> pf - (Phrase Fields) fields/boosts to make phrase queries out
* of to boost
* the users query for exact matches on the specified fields.
* Format is: "<code>fieldA^1.0 fieldB^2.2</code>".
* </li>
* <li> ps - (Phrase Slop) amount of slop on phrase queries built for pf
* fields.
* </li>
* <li> bq - (Boost Query) a raw lucene query that will be included in the
* users query to influcene the score. If this is a BooleanQuery
* with a default boost (1.0f) then the individual clauses will be
* added directly to the main query. Otherwise the query will be
* included as is.
* </li>
* <li> bf - (Boost Functions) functions (with optional boosts) that will be
* included in the users query to influcene the score.
* Format is: "<code>funcA(arg1,arg2)^1.2
* funcB(arg3,arg4)^2.2</code>". NOTE: Whitespace is not allowed
* in the function arguments.
* </li>
* <li> fq - (Filter Query) a raw lucene query that can be used
* to restrict the super set of products we are interested in - more
* efficient then using bq, but doesn't influence score.
* </li>
* </ul>
*
* <p>
* The following options are only available as request params...
* </p>
*
* <ul>
* <li> q - (Query) the raw unparsed, unescaped, query from the user.
* </li>
* <li>sort - (Order By) list of fields and direction to sort on.
* </li>
* </ul>
*/
public class DisMaxRequestHandler
implements SolrRequestHandler, SolrInfoMBean {
/**
* A field we can't ever find in any schema, so we can safely tell
* DisjunctionMaxQueryParser to use it as our defaultField, and
* map aliases from it to any field in our schema.
*/
private static String IMPOSSIBLE_FIELD_NAME = "\uFFFC\uFFFC\uFFFC";
// statistics
// TODO: should we bother synchronizing these, or is an off-by-one error
// acceptable every million requests or so?
long numRequests;
long numErrors;
/** shorten the class referneces for utilities */
private static class U extends SolrPluginUtils {
/* :NOOP */
}
protected final U.CommonParams params = new U.CommonParams();
public DisMaxRequestHandler() {
super();
}
/* returns URLs to the Wiki pages */
public URL[] getDocs() {
/* :TODO: need docs */
return new URL[0];
}
public String getName() {
return this.getClass().getName();
}
public NamedList getStatistics() {
NamedList lst = new NamedList();
lst.add("requests", numRequests);
lst.add("errors", numErrors);
return lst;
}
public String getVersion() {
return "$Revision:$";
}
public String getDescription() {
return "DisjunctionMax Request Handler: Does relevancy based queries "
+ "accross a variety of fields using configured boosts";
}
public Category getCategory() {
return Category.QUERYHANDLER;
}
public String getSourceId() {
return "$Id:$";
}
public String getSource() {
return "$URL:$";
}
/** sets the default variables for any usefull info it finds in the config
* if a config option is not inthe format expected, logs an warning
* and ignores it..
*/
public void init(NamedList args) {
params.setValues(args);
}
public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
numRequests++;
try {
SolrIndexSearcher s = req.getSearcher();
IndexSchema schema = req.getSchema();
Map<String,Float> queryFields =
U.parseFieldBoosts(U.getParam(req, params.QF, params.qf));
Map<String,Float> phraseFields =
U.parseFieldBoosts(U.getParam(req, params.PF, params.pf));
float tiebreaker = U.getNumberParam
(req, params.TIE, params.tiebreaker).floatValue();
int pslop = U.getNumberParam(req, params.PS, params.pslop).intValue();
/* a generic parser for parsing regular lucene queries */
QueryParser p = new SolrQueryParser(schema, null);
/* a parser for dealing with user input, which will convert
* things to DisjunctionMaxQueries
*/
U.DisjunctionMaxQueryParser up =
new U.DisjunctionMaxQueryParser(schema, IMPOSSIBLE_FIELD_NAME);
up.addAlias(IMPOSSIBLE_FIELD_NAME,
tiebreaker, queryFields);
/* for parsing slopy phrases using DisjunctionMaxQueries */
U.DisjunctionMaxQueryParser pp =
new U.DisjunctionMaxQueryParser(schema, IMPOSSIBLE_FIELD_NAME);
pp.addAlias(IMPOSSIBLE_FIELD_NAME,
tiebreaker, phraseFields);
pp.setPhraseSlop(pslop);
/* * * Main User Query * * */
String userQuery = U.partialEscape
(U.stripUnbalancedQuotes(req.getQueryString())).toString();
/* the main query we will execute. we disable the coord because
* this query is an artificial construct
*/
BooleanQuery query = new BooleanQuery(true);
String minShouldMatch = U.getParam(req, params.MM, params.mm);
Query dis = up.parse(userQuery);
if (dis instanceof BooleanQuery) {
BooleanQuery t = new BooleanQuery();
U.flatenBooleanQuery(t, (BooleanQuery)dis);
U.setMinShouldMatch(t, minShouldMatch);
query.add(t, Occur.MUST);
} else {
query.add(dis, Occur.MUST);
}
/* * * Add on Phrases for the Query * * */
/* build up phrase boosting queries */
/* if the userQuery already has some quotes, stip them out.
* we've already done the phrases they asked for in the main
* part of the query, this is to boost docs that may not have
* matched those phrases but do match looser phrases.
*/
String userPhraseQuery = userQuery.replace("\"","");
Query phrase = pp.parse("\"" + userPhraseQuery + "\"");
if (null != phrase) {
query.add(phrase, Occur.SHOULD);
}
/* * * Boosting Query * * */
String boostQuery = U.getParam(req, params.BQ, params.bq);
if (null != boostQuery && !boostQuery.equals("")) {
Query tmp = p.parse(boostQuery);
/* if the default boost was used, and we've got a BooleanQuery
* extract the subqueries out and use them directly
*/
if (1.0f == tmp.getBoost() && tmp instanceof BooleanQuery) {
for (BooleanClause c : ((BooleanQuery)tmp).getClauses()) {
query.add(c);
}
} else {
query.add(tmp, BooleanClause.Occur.SHOULD);
}
}
/* * * Boosting Functions * * */
String boostFunc = U.getParam(req, params.BF, params.bf);
if (null != boostFunc && !boostFunc.equals("")) {
List<Query> funcs = U.parseFuncs(schema, boostFunc);
for (Query f : funcs) {
query.add(f, Occur.SHOULD);
}
}
/* * * Restrict Results * * */
List<Query> restrictions = new ArrayList<Query>(1);
/* User Restriction */
String filterQueryString = U.getParam(req, params.FQ, params.fq);
Query filterQuery = null;
if (null != filterQueryString && !filterQueryString.equals("")) {
filterQuery = p.parse(filterQueryString);
restrictions.add(filterQuery);
}
/* * * Generate Main Results * * */
DocList results = s.getDocList(query, restrictions,
SolrPluginUtils.getSort(req),
req.getStart(), req.getLimit(),
SolrIndexSearcher.GET_SCORES);
rsp.add("search-results",results);
U.setReturnFields(U.getParam(req, params.FL, params.fl), rsp);
/* * * Debugging Info * * */
try {
NamedList debug = U.doStandardDebug(req, userQuery, query, results);
if (null != debug) {
debug.add("boostquery", boostQuery);
debug.add("boostfunc", boostFunc);
debug.add("filterquery", filterQueryString);
if (null != filterQuery) {
debug.add("parsedfilterquery",
QueryParsing.toString(filterQuery, schema));
}
rsp.add("debug", debug);
}
} catch (Exception e) {
SolrException.logOnce(SolrCore.log,
"Exception durring debug", e);
rsp.add("exception_during_debug", SolrException.toStr(e));
}
} catch (Exception e) {
SolrException.log(SolrCore.log,e);
rsp.setException(e);
numErrors++;
}
}
}

View File

@ -0,0 +1,823 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.core.SolrException;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocListAndSet;
import org.apache.solr.search.SolrCache;
import org.apache.solr.search.SolrQueryParser;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.CacheRegenerator;
import org.apache.solr.request.StandardRequestHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.FieldType;
import org.apache.solr.util.StrUtils;
import org.apache.solr.util.NamedList;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.analysis.Analyzer;
import org.xmlpull.v1.XmlPullParserException;
import java.util.logging.Logger;
import java.util.logging.Level;
import java.util.logging.Handler;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import java.util.Collection;
import java.util.Set;
import java.util.HashSet;
import java.util.Map;
import java.util.HashMap;
import java.util.regex.Pattern;
import java.io.IOException;
import java.net.URL;
/**
* <p>Utilities that may be of use to RequestHandlers.</p>
*
* <p>
* Many of these functions have code that was stolen/mutated from
* StandardRequestHandler.
* </p>
*
* <p>:TODO: refactor StandardRequestHandler to use these utilities</p>
*/
public class SolrPluginUtils {
/** standard param for field list */
public static String FL = CommonParams.FL;
/**
* SolrIndexSearch.numDocs(Query,Query) freaks out if the filtering
* query is null, so we use this workarround.
*/
public static int numDocs(SolrIndexSearcher s, Query q, Query f)
throws IOException {
return (null == f) ? s.getDocSet(q).size() : s.numDocs(q,f);
}
/**
* Returns the param, or the default if it's empty or not specified.
*/
public static String getParam(SolrQueryRequest req,
String param, String def) {
String v = req.getParam(param);
if (null == v || "".equals(v.trim())) {
return def;
}
return v;
}
/**
* Treats the param value as a Number, returns the default if nothing is
* there or if it's not a number.
*/
public static Number getNumberParam(SolrQueryRequest req,
String param, Number def) {
Number r = def;
String v = req.getParam(param);
if (null == v || "".equals(v.trim())) {
return r;
}
try {
r = new Float(v);
} catch (NumberFormatException e) {
/* :NOOP" */
}
return r;
}
private final static Pattern splitList=Pattern.compile(",| ");
/**
* Assumes the standard query param of "fl" to specify the return fields
* @see #setReturnFields(String,SolrQueryResponse)
*/
public static void setReturnFields(SolrQueryRequest req,
SolrQueryResponse res) {
setReturnFields(req.getParam(FL), res);
}
/**
* Given a space seperated list of field names, sets the field list on the
* SolrQueryResponse.
*/
public static void setReturnFields(String fl,
SolrQueryResponse res) {
if (fl != null) {
// TODO - this could become more efficient if widely used.
// TODO - should field order be maintained?
String[] flst = splitList.split(fl.trim(),0);
if (flst.length > 0 && !(flst.length==1 && flst[0].length()==0)) {
Set<String> set = new HashSet<String>();
for (String fname : flst) set.add(fname);
res.setReturnFields(set);
}
}
}
/**
* <p>
* Returns a NamedList containing many "standard" pieces of debugging
* information.
* </p>
*
* <ul>
* <li>rawquerystring - the 'q' param exactly as specified by the client
* </li>
* <li>querystring - the 'q' param after any preprocessing done by the plugin
* </li>
* <li>parsedquery - the main query executed formated by the Solr
* QueryParsing utils class (which knows about field types)
* </li>
* <li>parsedquery_toString - the main query executed formated by it's
* own toString method (in case it has internal state Solr
* doesn't know about)
* </li>
* <li>expain - the list of score explanations for each document in
* results against query.
* </li>
* <li>otherQuery - the query string specified in 'explainOther' query param.
* </li>
* <li>explainOther - the list of score explanations for each document in
* results against 'otherQuery'
* </li>
* </ul>
*
* @param req the request we are dealing with
* @param userQuery the users query as a string, after any basic
* preprocessing has been done
* @param query the query built from the userQuery
* (and perhaps other clauses) that identifies the main
* result set of the response.
* @param results the main result set of hte response
*/
public static NamedList doStandardDebug(SolrQueryRequest req,
String userQuery,
Query query,
DocList results)
throws IOException {
String debug = req.getParam("debugQuery");
NamedList dbg = null;
if (debug!=null) {
dbg = new NamedList();
/* userQuery may have been pre-processes .. expose that */
dbg.add("rawquerystring",req.getQueryString());
dbg.add("querystring",userQuery);
/* QueryParsing.toString isn't perfect, use it to see converted
* values, use regular toString to see any attributes of the
* underlying Query it may have missed.
*/
dbg.add("parsedquery",QueryParsing.toString(query, req.getSchema()));
dbg.add("parsedquery_toString", query.toString());
dbg.add("explain", getExplainList
(query, results, req.getSearcher(), req.getSchema()));
String otherQueryS = req.getParam("explainOther");
if (otherQueryS != null && otherQueryS.length() > 0) {
DocList otherResults = doSimpleQuery
(otherQueryS,req.getSearcher(), req.getSchema(),0,10);
dbg.add("otherQuery",otherQueryS);
dbg.add("explainOther", getExplainList
(query, otherResults,
req.getSearcher(),
req.getSchema()));
}
}
return dbg;
}
/**
* Generates an list of Explanations for each item in a list of docs.
*
* @param query The Query you want explanations in the context of
* @param docs The Documents you want explained relative that query
*/
public static NamedList getExplainList(Query query, DocList docs,
SolrIndexSearcher searcher,
IndexSchema schema)
throws IOException {
NamedList explainList = new NamedList();
DocIterator iterator = docs.iterator();
for (int i=0; i<docs.size(); i++) {
int id = iterator.nextDoc();
Explanation explain = searcher.explain(query, id);
Document doc = searcher.doc(id);
String strid = schema.printableUniqueKey(doc);
String docname = "";
if (strid != null) docname="id="+strid+",";
docname = docname + "internal_docid="+id;
explainList.add(docname, "\n" +explain.toString());
}
return explainList;
}
/**
* Executes a basic query in lucene syntax
*/
public static DocList doSimpleQuery(String sreq,
SolrIndexSearcher searcher,
IndexSchema schema,
int start, int limit) throws IOException {
List<String> commands = StrUtils.splitSmart(sreq,';');
String qs = commands.size() >= 1 ? commands.get(0) : "";
Query query = QueryParsing.parseQuery(qs, schema);
// If the first non-query, non-filter command is a simple sort on an indexed field, then
// we can use the Lucene sort ability.
Sort sort = null;
if (commands.size() >= 2) {
QueryParsing.SortSpec sortSpec = QueryParsing.parseSort(commands.get(1), schema);
if (sortSpec != null) {
sort = sortSpec.getSort();
if (sortSpec.getCount() >= 0) {
limit = sortSpec.getCount();
}
}
}
DocList results = searcher.getDocList(query,(DocSet)null, sort, start, limit);
return results;
}
/**
* Given a string containing fieldNames and boost info,
* converts it to a Map from field name to boost info.
*
* <p>
* Doesn't care if boost info is negative, you're on your own.
* </p>
* <p>
* Doesn't care if boost info is missing, again: you're on your own.
* </p>
*
* @param in a String like "fieldOne^2.3 fieldTwo fieldThree^-0.4"
* @return Map of fieldOne =&gt; 2.3, fieldTwo =&gt; null, fieldThree =&gt; -0.4
*/
public static Map<String,Float> parseFieldBoosts(String in) {
if (null == in || "".equals(in.trim())) {
return new HashMap<String,Float>();
}
String[] bb = in.trim().split("\\s+");
Map<String, Float> out = new HashMap<String,Float>(7);
for (String s : bb) {
String[] bbb = s.split("\\^");
out.put(bbb[0], 1 == bbb.length ? null : Float.valueOf(bbb[1]));
}
return out;
}
/**
* Given a string containing functions with optional boosts, returns
* an array of Queries representing those functions with the specified
* boosts.
* <p>
* NOTE: intra-function whitespace is not allowed.
* </p>
* @see #parseFieldBoosts
*/
public static List<Query> parseFuncs(IndexSchema s, String in)
throws ParseException {
Map<String,Float> ff = parseFieldBoosts(in);
List<Query> funcs = new ArrayList<Query>(ff.keySet().size());
for (String f : ff.keySet()) {
Query fq = QueryParsing.parseFunction(f, s);
Float b = ff.get(f);
if (null != b) {
fq.setBoost(b);
}
funcs.add(fq);
}
return funcs;
}
/**
* Checks the number of optional clauses in the query, and compares it
* with the specification string to determine the proper value to use.
*
* <p>
* Details about the specification format can be found
* <a href="doc-files/min-should-match.html">here</a>
* </p>
*
* <p>A few important notes...</p>
* <ul>
* <li>
* If the calculations based on the specification determine that no
* optional clauses are needed, BooleanQuerysetMinMumberShouldMatch
* will never be called, but the usual rules about BooleanQueries
* still apply at search time (a BooleanQuery containing no required
* clauses must still match at least one optional clause)
* <li>
* <li>
* No matter what number the calculation arrives at,
* BooleanQuery.setMinShouldMatch() will never be called with a
* value greater then the number of optional clauses (or less then 1)
* </li>
* </ul>
*
* <p>:TODO: should optimize the case where number is same
* as clauses to just make them all "required"
* </p>
*/
public static void setMinShouldMatch(BooleanQuery q, String spec) {
int optionalClauses = 0;
for (BooleanClause c : q.getClauses()) {
if (c.getOccur() == Occur.SHOULD) {
optionalClauses++;
}
}
int msm = calculateMinShouldMatch(optionalClauses, spec);
if (0 < msm) {
q.setMinimumNumberShouldMatch(msm);
}
}
/**
* helper exposed for UnitTests
* @see #setMinShouldMatch
*/
static int calculateMinShouldMatch(int optionalClauseCount, String spec) {
int result = optionalClauseCount;
if (-1 < spec.indexOf("<")) {
/* we have conditional spec(s) */
for (String s : spec.trim().split(" ")) {
String[] parts = s.split("<");
int upperBound = (new Integer(parts[0])).intValue();
if (optionalClauseCount <= upperBound) {
return result;
} else {
result = calculateMinShouldMatch
(optionalClauseCount, parts[1]);
}
}
return result;
}
/* otherwise, simple expresion */
if (-1 < spec.indexOf("%")) {
/* percentage */
int percent = new Integer(spec.replace("%","")).intValue();
float calc = (result * percent) / 100f;
result = calc < 0 ? result + (int)calc : (int)calc;
} else {
int calc = (new Integer(spec)).intValue();
result = calc < 0 ? result + calc : calc;
}
return (optionalClauseCount < result ?
optionalClauseCount : (result < 0 ? 0 : result));
}
/**
* Recursively walks the "from" query pulling out sub-queries and
* adding them to the "to" query.
*
* <p>
* Boosts are multiplied as needed. Sub-BooleanQueryies which are not
* optional will not be flattened. From will be mangled durring the walk,
* so do not attempt to reuse it.
* </p>
*/
public static void flatenBooleanQuery(BooleanQuery to, BooleanQuery from) {
BooleanClause[] c = from.getClauses();
for (int i = 0; i < c.length; i++) {
Query ci = c[i].getQuery();
ci.setBoost(ci.getBoost() * from.getBoost());
if (ci instanceof BooleanQuery
&& !c[i].isRequired()
&& !c[i].isProhibited()) {
/* we can recurse */
flatenBooleanQuery(to, (BooleanQuery)ci);
} else {
to.add(c[i]);
}
}
}
/**
* Escapes all special characters except '"', '-', and '+'
*
* @see QueryParser#escape
*/
public static CharSequence partialEscape(CharSequence s) {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (c == '\\' || c == '!' || c == '(' || c == ')' ||
c == ':' || c == '^' || c == '[' || c == ']' ||
c == '{' || c == '}' || c == '~' || c == '*' || c == '?'
) {
sb.append('\\');
}
sb.append(c);
}
return sb;
}
/**
* Returns it's input if there is an even (ie: balanced) number of
* '"' characters -- otherwise returns a String in which all '"'
* characters are striped out.
*/
public static CharSequence stripUnbalancedQuotes(CharSequence s) {
int count = 0;
for (int i = 0; i < s.length(); i++) {
if (s.charAt(i) == '\"') { count++; }
}
if (0 == (count & 1)) {
return s;
}
return s.toString().replace("\"","");
}
/**
* A collection on common params, both for Plugin initialization and
* for Requests.
*/
public static class CommonParams {
/** query and init param for tiebreaker value */
public static String TIE = "tie";
/** query and init param for query fields */
public static String QF = "qf";
/** query and init param for phrase boost fields */
public static String PF = "pf";
/** query and init param for MinShouldMatch specification */
public static String MM = "mm";
/** query and init param for Phrase Slop value */
public static String PS = "ps";
/** query and init param for boosting query */
public static String BQ = "bq";
/** query and init param for boosting functions */
public static String BF = "bf";
/** query and init param for filtering query */
public static String FQ = "fq";
/** query and init param for field list */
public static String FL = "fl";
/** query and init param for field list */
public static String GEN = "gen";
/** the default tie breaker to use in DisjunctionMaxQueries */
public float tiebreaker = 0.0f;
/** the default query fields to be used */
public String qf = null;
/** the default phrase boosting fields to be used */
public String pf = null;
/** the default min should match to be used */
public String mm = "100%";
/** the default phrase slop to be used */
public int pslop = 0;
/** the default boosting query to be used */
public String bq = null;
/** the default boosting functions to be used */
public String bf = null;
/** the default filtering query to be used */
public String fq = null;
/** the default field list to be used */
public String fl = null;
public CommonParams() {
/* :NOOP: */
}
/** @see #setValues */
public CommonParams(NamedList args) {
this();
setValues(args);
}
/**
* Sets the params using values from a NamedList, usefull in the
* init method for your handler.
*
* <p>
* If any param is not of the expected type, a severe error is
* logged,and the param is skipped.
* </p>
*
* <p>
* If any param is not of in the NamedList, it is skipped and the
* old value is left alone.
* </p>
*
*/
public void setValues(NamedList args) {
Object tmp;
tmp = args.get(TIE);
if (null != tmp) {
if (tmp instanceof Float) {
tiebreaker = ((Float)tmp).floatValue();
} else {
SolrCore.log.severe("init param is not a float: " + TIE);
}
}
tmp = args.get(QF);
if (null != tmp) {
if (tmp instanceof String) {
qf = tmp.toString();
} else {
SolrCore.log.severe("init param is not a str: " + QF);
}
}
tmp = args.get(PF);
if (null != tmp) {
if (tmp instanceof String) {
pf = tmp.toString();
} else {
SolrCore.log.severe("init param is not a str: " + PF);
}
}
tmp = args.get(MM);
if (null != tmp) {
if (tmp instanceof String) {
mm = tmp.toString();
} else {
SolrCore.log.severe("init param is not a str: " + MM);
}
}
tmp = args.get(PS);
if (null != tmp) {
if (tmp instanceof Integer) {
pslop = ((Integer)tmp).intValue();
} else {
SolrCore.log.severe("init param is not an int: " + PS);
}
}
tmp = args.get(BQ);
if (null != tmp) {
if (tmp instanceof String) {
bq = tmp.toString();
} else {
SolrCore.log.severe("init param is not a str: " + BQ);
}
}
tmp = args.get(BF);
if (null != tmp) {
if (tmp instanceof String) {
bf = tmp.toString();
} else {
SolrCore.log.severe("init param is not a str: " + BF);
}
}
tmp = args.get(FQ);
if (null != tmp) {
if (tmp instanceof String) {
fq = tmp.toString();
} else {
SolrCore.log.severe("init param is not a str: " + FQ);
}
}
tmp = args.get(FL);
if (null != tmp) {
if (tmp instanceof String) {
fl = tmp.toString();
} else {
SolrCore.log.severe("init param is not a str: " + FL);
}
}
}
}
/**
* A subclass of SolrQueryParser that supports aliasing fields for
* constructing DisjunctionMaxQueries.
*/
public static class DisjunctionMaxQueryParser extends SolrQueryParser {
/** A simple container for storing alias info
* @see #aliases
*/
protected static class Alias {
public float tie;
public Map<String,Float> fields;
}
/**
* Where we store a map from field name we expect to see in our query
* string, to Alias object containing the fields to use in our
* DisjunctionMaxQuery and the tiebreaker to use.
*/
protected Map<String,Alias> aliases = new HashMap<String,Alias>(3);
public DisjunctionMaxQueryParser(IndexSchema s, String defaultField) {
super(s,defaultField);
}
public DisjunctionMaxQueryParser(IndexSchema s) {
this(s,null);
}
/**
* Add an alias to this query parser.
*
* @param field the field name that should trigger alias mapping
* @param fieldBoosts the mapping from fieldname to boost value that
* should be used to build up the clauses of the
* DisjunctionMaxQuery.
* @param tiebreaker to the tiebreaker to be used in the
* DisjunctionMaxQuery
* @see SolrPluginUtils#parseFieldBoosts
*/
public void addAlias(String field, float tiebreaker,
Map<String,Float> fieldBoosts) {
Alias a = new Alias();
a.tie = tiebreaker;
a.fields = fieldBoosts;
aliases.put(field, a);
}
/**
* Delegates to the super class unless the field has been specified
* as an alias -- in which case we recurse on each of
* the aliased fields, and the results are composed into a
* DisjunctionMaxQuery. (so yes: aliases which point at other
* aliases should work)
*/
protected Query getFieldQuery(String field, String queryText)
throws ParseException {
if (aliases.containsKey(field)) {
Alias a = aliases.get(field);
DisjunctionMaxQuery q = new DisjunctionMaxQuery(a.tie);
/* we might not get any valid queries from delegation,
* in which we should return null
*/
boolean ok = false;
for (String f : a.fields.keySet()) {
Query sub = getFieldQuery(f,queryText);
if (null != sub) {
if (null != a.fields.get(f)) {
sub.setBoost(a.fields.get(f));
}
q.add(sub);
ok = true;
}
}
return ok ? q : null;
} else {
return super.getFieldQuery(field, queryText);
}
}
}
/**
* Determines the correct Sort based on the request parameter "sort"
*
* @return null if no sort is specified.
*/
public static Sort getSort(SolrQueryRequest req) {
String sort = req.getParam("sort");
if (null == sort || sort.equals("")) {
return null;
}
SolrException sortE = null;
QueryParsing.SortSpec ss = null;
try {
ss = QueryParsing.parseSort(sort, req.getSchema());
} catch (SolrException e) {
sortE = e;
}
if ((null == ss) || (null != sortE)) {
/* we definitely had some sort of sort string from the user,
* but no SortSpec came out of it
*/
SolrCore.log.log(Level.WARNING,"Invalid sort \""+sort+"\" was specified, ignoring", sortE);
return null;
}
return ss.getSort();
}
/**
* A CacheRegenerator that can be used whenever the items in the cache
* are not dependant on the current searcher.
*
* <p>
* Flat out copies the oldKey=&gt;oldVal pair into the newCache
* </p>
*/
public static class IdentityRegenerator implements CacheRegenerator {
public boolean regenerateItem(SolrIndexSearcher newSearcher,
SolrCache newCache,
SolrCache oldCache,
Object oldKey,
Object oldVal)
throws IOException {
newCache.put(oldKey,oldVal);
return true;
}
}
}

View File

@ -0,0 +1,110 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title>Min Number Should Match Specification Format</title>
</head>
<body>
<h1>Min Number Should Match Specification Format</h1>
<blockquote>
This document explains the format used for specifying the
"Min Number Should Match" criteria of the BooleanQuery objects built by the
DisMaxRequestHandler.
</blockquote>
<h2>Explaination of Concept: "Min Number Should Match"</h2>
<div>
:TODO:
</div>
<h2>Specification Format</h2>
<div>
<p>Specification strings may have the following formats...</p>
<dl>
<dt><code>3</code></dt>
<dd>A positive integer, indicating a fixed value regardless of the
number of optional clauses.
</dd>
<dt><code>-2</code></dt>
<dd>A negative integer, indicating that the total number of optional clauses,
minus this number should be mandatory.
</dd>
<dt><code>75%</code></dt>
<dd>A percentage, indicating that this percent of the total number of
optional clauses are neccessary. The number computed from the
percentage is rounded down and used as the minimum.
</dd>
<dt><code>-25%</code></dt>
<dd>A negative percentage, indicating that this percent of the total
number of optional clauses can be missing. The number computed from the
percentage is rounded down, before being subtracted from the total
to determine the minimum.
</dd>
<dt><code>3&lt;90%</code></dt>
<dd>A positive integer, followed by the less-than symbol, followed
by any of the previously mentioned specifiers is a conditional
specification. It indicates that if the number of optional clauses is
equal to (or less than) the integer, they are all required, but
if it's greater then the integer, the specification applies.
In this example: if there are 1 to 3 clauses they are all required,
but for 4 or more clauses only 90% are required.
</dd>
<dt><code>2&lt;-25% 9&lt;-3</code></dt>
<dd>Multiple conditional specifications can be seperated by spaces,
each one only being valid for numbers greater then the one before it.
In this example: if there are 1 or 2 clauses both are required,
if there are 3-9 clauses all but 25% are requred, and if there
are more then 9 clauses, all but three are required.
</dd>
</dl>
<p>
A few important notes...
</p>
<ul>
<li>
When dealing with percentages, negative values can be used to get
different behavior in edge cases. 75% and -25% mean the same thing
when dealing with 4 clauses, but when dealing with 5 clauses 75% means
3 are required, but -25% means 4 are required.
</li>
<li>
If the calculations based on the specification determine that no
optional clauses are needed, the usual rules about BooleanQueries
still apply at search time (a BooleanQuery containing no required
clauses must still match at least one optional clause)
</li>
<li>
No matter what number the calculation arrives at,
a value greater then the number of optional clauses, or a value less then
1 will never be used. (ie: no matter how low or how high the result of the
calculation result is, the minimum number of required matches will never
be lower then 1 or greatered then the number of clauses.
</li>
</ul>
</div>
<hr>
<pre>
$Id:$
$Source:$
</pre>
</body> </html>

View File

@ -0,0 +1,103 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr;
import org.apache.solr.request.*;
import org.apache.solr.util.*;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import java.io.IOException;
import java.io.StringWriter;
import java.io.ByteArrayInputStream;
import java.io.UnsupportedEncodingException;
import java.util.Map;
import java.util.HashMap;
/**
* Tests some basic functionality of the DisMaxRequestHandler
*/
public class DisMaxRequestHandlerTest extends AbstractSolrTestCase {
public String getSchemaFile() { return "schema.xml"; }
public String getSolrConfigFile() { return "solrconfig.xml"; }
public void setUp() throws Exception {
super.setUp();
lrf = h.getRequestFactory
("dismax",0,20,"version","2.0");
}
public void testSomeStuff() throws Exception {
assertU(adoc("id", "666",
"features_t", "cool and scary stuff",
"subject", "traveling in hell",
"title", "The Omen",
"weight", "87.9",
"iind", "666"));
assertU(adoc("id", "42",
"features_t", "cool stuff",
"subject", "traveling the galaxy",
"title", "Hitch Hiker's Guide to the Galaxy",
"weight", "99.45",
"iind", "42"));
assertU(adoc("id", "1",
"features_t", "nothing",
"subject", "garbage",
"title", "Most Boring Guide Ever",
"weight", "77",
"iind", "4"));
assertU(adoc("id", "8675309",
"features_t", "Wikedly memorable chorus and stuff",
"subject", "One Cool Hot Chick",
"title", "Jenny",
"weight", "97.3",
"iind", "8675309"));
assertU(commit());
assertQ("basic match",
req("guide")
,"//*[@numFound='2']"
);
assertQ("basic cross field matching, boost on same field matching",
req("cool stuff")
,"//*[@numFound='3']"
,"//result/doc[1]/int[@name='id'][.='42']"
,"//result/doc[2]/int[@name='id'][.='666']"
,"//result/doc[3]/int[@name='id'][.='8675309']"
);
assertQ("minimum mm is three",
req("cool stuff traveling")
,"//*[@numFound='2']"
,"//result/doc[1]/int[@name='id'][. ='42']"
,"//result/doc[2]/int[@name='id'][. ='666']"
);
assertQ("at 4 mm allows one missing ",
req("cool stuff traveling jenny")
,"//*[@numFound='3']"
);
}
}

View File

@ -0,0 +1,329 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util;
import org.apache.solr.search.SolrQueryParser;
import org.apache.solr.util.NamedList;
import org.apache.solr.util.SolrPluginUtils;
import org.apache.solr.util.SolrPluginUtils.DisjunctionMaxQueryParser;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.xmlpull.v1.XmlPullParserFactory;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
import java.io.File;
import java.math.BigDecimal;
import java.util.Random;
import java.util.Date;
import java.util.List;
import java.util.Arrays;
import java.util.Map;
import java.util.HashMap;
import java.util.Iterator;
/**
* Tests that the functions in SolrPluginUtils work as advertised.
*/
public class SolrPluginUtilsTest extends AbstractSolrTestCase {
public String getSchemaFile() { return "schema.xml"; }
public String getSolrConfigFile() { return "solrconfig.xml"; }
public void testPartialEscape() {
assertEquals("",pe(""));
assertEquals("foo",pe("foo"));
assertEquals("foo\\:bar",pe("foo:bar"));
assertEquals("+foo\\:bar",pe("+foo:bar"));
assertEquals("foo \\! bar",pe("foo ! bar"));
assertEquals("foo\\?",pe("foo?"));
assertEquals("foo \"bar\"",pe("foo \"bar\""));
assertEquals("foo\\! \"bar\"",pe("foo! \"bar\""));
}
public void testStripUnbalancedQuotes() {
assertEquals("",strip(""));
assertEquals("foo",strip("foo"));
assertEquals("foo \"bar\"",strip("foo \"bar\""));
assertEquals("42",strip("42\""));
assertEquals("\"how now brown cow?\"",strip("\"how now brown cow?\""));
assertEquals("\"you go\" \"now!\"",strip("\"you go\" \"now!\""));
}
public void testParseFieldBoosts() throws Exception {
Map<String,Float> e1 = new HashMap<String,Float>();
e1.put("fieldOne",2.3f);
e1.put("fieldTwo",null);
e1.put("fieldThree",-0.4f);
assertEquals("basic e1", e1, SolrPluginUtils.parseFieldBoosts
("fieldOne^2.3 fieldTwo fieldThree^-0.4"));
assertEquals("spacey e1", e1, SolrPluginUtils.parseFieldBoosts
(" fieldOne^2.3 fieldTwo fieldThree^-0.4 "));
assertEquals("really spacey e1", e1, SolrPluginUtils.parseFieldBoosts
(" \t fieldOne^2.3 \n fieldTwo fieldThree^-0.4 "));
Map<String,Float> e2 = new HashMap<String,Float>();
assertEquals("empty e2", e2, SolrPluginUtils.parseFieldBoosts
(""));
assertEquals("spacey e2", e2, SolrPluginUtils.parseFieldBoosts
(" \t "));
}
public void testDisjunctionMaxQueryParser() throws Exception {
Query out;
String t;
DisjunctionMaxQueryParser qp =
new SolrPluginUtils.DisjunctionMaxQueryParser(h.getCore().getSchema());
qp.addAlias("hoss", 0.01f, SolrPluginUtils.parseFieldBoosts
("title^2.0 title_stemmed name^1.2 subject^0.5"));
qp.addAlias("test", 0.01f, SolrPluginUtils.parseFieldBoosts("text^2.0"));
qp.addAlias("unused", 1.0f, SolrPluginUtils.parseFieldBoosts
("subject^0.5 sind^1.5"));
/* first some sanity tests that don't use aliasing at all */
t = "XXXXXXXX";
out = qp.parse(t);
assertNotNull(t+" sanity test gave back null", out);
assertTrue(t+" sanity test isn't TermQuery: " + out.getClass(),
out instanceof TermQuery);
assertEquals(t+" sanity test is wrong field",
h.getCore().getSchema().getDefaultSearchFieldName(),
((TermQuery)out).getTerm().field());
t = "subject:XXXXXXXX";
out = qp.parse(t);
assertNotNull(t+" sanity test gave back null", out);
assertTrue(t+" sanity test isn't TermQuery: " + out.getClass(),
out instanceof TermQuery);
assertEquals(t+" sanity test is wrong field", "subject",
((TermQuery)out).getTerm().field());
/* field has untokenzied type, so this should be a term anyway */
t = "sind:\"simple phrase\"";
out = qp.parse(t);
assertNotNull(t+" sanity test gave back null", out);
assertTrue(t+" sanity test isn't TermQuery: " + out.getClass(),
out instanceof TermQuery);
assertEquals(t+" sanity test is wrong field", "sind",
((TermQuery)out).getTerm().field());
t = "subject:\"simple phrase\"";
out = qp.parse(t);
assertNotNull(t+" sanity test gave back null", out);
assertTrue(t+" sanity test isn't PhraseQuery: " + out.getClass(),
out instanceof PhraseQuery);
assertEquals(t+" sanity test is wrong field", "subject",
((PhraseQuery)out).getTerms()[0].field());
/* now some tests that use aliasing */
/* basic usage of single "term" */
t = "hoss:XXXXXXXX";
out = qp.parse(t);
assertNotNull(t+" was null", out);
assertTrue(t+" wasn't a DMQ:" + out.getClass(),
out instanceof DisjunctionMaxQuery);
assertEquals(t+" wrong number of clauses", 4,
countItems(((DisjunctionMaxQuery)out).iterator()));
/* odd case, but should still work, DMQ of one clause */
t = "test:YYYYY";
out = qp.parse(t);
assertNotNull(t+" was null", out);
assertTrue(t+" wasn't a DMQ:" + out.getClass(),
out instanceof DisjunctionMaxQuery);
assertEquals(t+" wrong number of clauses", 1,
countItems(((DisjunctionMaxQuery)out).iterator()));
/* basic usage of multiple "terms" */
t = "hoss:XXXXXXXX test:YYYYY";
out = qp.parse(t);
assertNotNull(t+" was null", out);
assertTrue(t+" wasn't a boolean:" + out.getClass(),
out instanceof BooleanQuery);
{
BooleanQuery bq = (BooleanQuery)out;
assertEquals(t+" wrong number of clauses", 2,
bq.getClauses().length);
Query sub = bq.getClauses()[0].getQuery();
assertTrue(t+" first wasn't a DMQ:" + sub.getClass(),
sub instanceof DisjunctionMaxQuery);
assertEquals(t+" first had wrong number of clauses", 4,
countItems(((DisjunctionMaxQuery)sub).iterator()));
sub = bq.getClauses()[1].getQuery();
assertTrue(t+" second wasn't a DMQ:" + sub.getClass(),
sub instanceof DisjunctionMaxQuery);
assertEquals(t+" second had wrong number of clauses", 1,
countItems(((DisjunctionMaxQuery)sub).iterator()));
}
/* a phrase, and a term that is a stop word for some fields */
t = "hoss:\"XXXXXX YYYYY\" hoss:the";
out = qp.parse(t);
assertNotNull(t+" was null", out);
assertTrue(t+" wasn't a boolean:" + out.getClass(),
out instanceof BooleanQuery);
{
BooleanQuery bq = (BooleanQuery)out;
assertEquals(t+" wrong number of clauses", 2,
bq.getClauses().length);
Query sub = bq.getClauses()[0].getQuery();
assertTrue(t+" first wasn't a DMQ:" + sub.getClass(),
sub instanceof DisjunctionMaxQuery);
assertEquals(t+" first had wrong number of clauses", 4,
countItems(((DisjunctionMaxQuery)sub).iterator()));
sub = bq.getClauses()[1].getQuery();
assertTrue(t+" second wasn't a DMQ:" + sub.getClass(),
sub instanceof DisjunctionMaxQuery);
assertEquals(t+" second had wrong number of clauses (stop words)", 2,
countItems(((DisjunctionMaxQuery)sub).iterator()));
}
}
private static int countItems(Iterator i) {
int count = 0;
while (i.hasNext()) {
count++;
i.next();
}
return count;
}
public void testMinShouldMatchCalculator() {
/* zero is zero is zero */
assertEquals(0, calcMSM(5, "0"));
assertEquals(0, calcMSM(5, "0%"));
assertEquals(0, calcMSM(5, "-5"));
assertEquals(0, calcMSM(5, "-100%"));
/* basic integers */
assertEquals(3, calcMSM(5, "3"));
assertEquals(2, calcMSM(5, "-3"));
assertEquals(3, calcMSM(3, "3"));
assertEquals(0, calcMSM(3, "-3"));
assertEquals(3, calcMSM(3, "5"));
assertEquals(0, calcMSM(3, "-5"));
/* positive percentages with rounding */
assertEquals(0, calcMSM(3, "25%"));
assertEquals(1, calcMSM(4, "25%"));
assertEquals(1, calcMSM(5, "25%"));
assertEquals(2, calcMSM(10, "25%"));
/* negative percentages with rounding */
assertEquals(3, calcMSM(3, "-25%"));
assertEquals(3, calcMSM(4, "-25%"));
assertEquals(4, calcMSM(5, "-25%"));
assertEquals(8, calcMSM(10, "-25%"));
/* conditional */
assertEquals(1, calcMSM(1, "3<0"));
assertEquals(2, calcMSM(2, "3<0"));
assertEquals(3, calcMSM(3, "3<0"));
assertEquals(0, calcMSM(4, "3<0"));
assertEquals(0, calcMSM(5, "3<0"));
assertEquals(1, calcMSM(1, "3<25%"));
assertEquals(2, calcMSM(2, "3<25%"));
assertEquals(3, calcMSM(3, "3<25%"));
assertEquals(1, calcMSM(4, "3<25%"));
assertEquals(1, calcMSM(5, "3<25%"));
/* multiple conditionals */
assertEquals(1, calcMSM(1, "3<-25% 10<-3"));
assertEquals(2, calcMSM(2, "3<-25% 10<-3"));
assertEquals(3, calcMSM(3, "3<-25% 10<-3"));
assertEquals(3, calcMSM(4, "3<-25% 10<-3"));
assertEquals(4, calcMSM(5, "3<-25% 10<-3"));
assertEquals(5, calcMSM(6, "3<-25% 10<-3"));
assertEquals(6, calcMSM(7, "3<-25% 10<-3"));
assertEquals(6, calcMSM(8, "3<-25% 10<-3"));
assertEquals(7, calcMSM(9, "3<-25% 10<-3"));
assertEquals(8, calcMSM(10, "3<-25% 10<-3"));
assertEquals(8, calcMSM(11, "3<-25% 10<-3"));
assertEquals(9, calcMSM(12, "3<-25% 10<-3"));
assertEquals(97, calcMSM(100, "3<-25% 10<-3"));
BooleanQuery q = new BooleanQuery();
q.add(new TermQuery(new Term("a","b")), Occur.SHOULD);
q.add(new TermQuery(new Term("a","c")), Occur.SHOULD);
q.add(new TermQuery(new Term("a","d")), Occur.SHOULD);
q.add(new TermQuery(new Term("a","d")), Occur.SHOULD);
SolrPluginUtils.setMinShouldMatch(q, "0");
assertEquals(0, q.getMinimumNumberShouldMatch());
SolrPluginUtils.setMinShouldMatch(q, "1");
assertEquals(1, q.getMinimumNumberShouldMatch());
SolrPluginUtils.setMinShouldMatch(q, "50%");
assertEquals(2, q.getMinimumNumberShouldMatch());
SolrPluginUtils.setMinShouldMatch(q, "99");
assertEquals(4, q.getMinimumNumberShouldMatch());
q.add(new TermQuery(new Term("a","e")), Occur.MUST);
q.add(new TermQuery(new Term("a","f")), Occur.MUST);
SolrPluginUtils.setMinShouldMatch(q, "50%");
assertEquals(2, q.getMinimumNumberShouldMatch());
}
/** macro */
public String pe(CharSequence s) {
return SolrPluginUtils.partialEscape(s).toString();
}
/** macro */
public String strip(CharSequence s) {
return SolrPluginUtils.stripUnbalancedQuotes(s).toString();
}
/** macro */
public int calcMSM(int clauses, String spec) {
return SolrPluginUtils.calculateMinShouldMatch(clauses, spec);
}
}

View File

@ -166,7 +166,23 @@
The "standard" request handler is the default and will be used if qt The "standard" request handler is the default and will be used if qt
is not specified in the request. is not specified in the request.
--> -->
<requestHandler name="standard" class="solr.StandardRequestHandler" /> <requestHandler name="standard" class="solr.StandardRequestHandler"/>
<requestHandler name="dismax" class="solr.DisMaxRequestHandler" >
<float name="tie">0.01</float>
<str name="qf">
text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0
</str>
<str name="pf">
text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5
</str>
<str name="bf">
ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3
</str>
<str name="mm">
3&lt;-1 5&lt;-2 6&lt;90%
</str>
<int name="ps">100</int>
</requestHandler>
<requestHandler name="old" class="solr.tst.OldRequestHandler" > <requestHandler name="old" class="solr.tst.OldRequestHandler" >
<int name="myparam">1000</int> <int name="myparam">1000</int>
<float name="ratio">1.4142135</float> <float name="ratio">1.4142135</float>