mirror of https://github.com/apache/lucene.git
new DisMaxRequestHandler as well as some generic SolrPluginUtils
git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@408103 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
375d553731
commit
d63338bd35
|
@ -180,14 +180,43 @@
|
||||||
-->
|
-->
|
||||||
<requestHandler name="standard" class="solr.StandardRequestHandler" />
|
<requestHandler name="standard" class="solr.StandardRequestHandler" />
|
||||||
|
|
||||||
<!-- example of a request handler with custom parameters passed to it's init()
|
<!-- DisMaxRequestHandler is an example of a request handler that
|
||||||
<requestHandler name="example" class="myorg.mypkg.MyRequestHandler" >
|
supports optional parameters which are passed to
|
||||||
<int name="myparam">1000</int>
|
it's init() method.
|
||||||
<float name="ratio">1.4142135</float>
|
-->
|
||||||
<arr name="myarr"><int>1</int><int>2</int></arr>
|
<requestHandler name="dismax" class="solr.DisMaxRequestHandler" >
|
||||||
<str>foo</str>
|
<float name="tie">0.01</float>
|
||||||
|
<str name="qf">
|
||||||
|
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
|
||||||
|
</str>
|
||||||
|
<str name="pf">
|
||||||
|
text^0.2 features^1.1 name^1.5 manu^1.4 manu_exact^1.9
|
||||||
|
</str>
|
||||||
|
<str name="bf">
|
||||||
|
ord(poplarity)^0.5 recip(rord(price),1,1000,1000)^0.3
|
||||||
|
</str>
|
||||||
|
<str name="fl">
|
||||||
|
id,name,price,score
|
||||||
|
</str>
|
||||||
|
<str name="mm">
|
||||||
|
2<-1 5<-2 6<90%
|
||||||
|
</str>
|
||||||
|
<int name="ps">100</int>
|
||||||
|
</requestHandler>
|
||||||
|
<!-- Note how you can register the same handler multiple times with
|
||||||
|
different names (and different init parameters)
|
||||||
|
-->
|
||||||
|
<requestHandler name="instock" class="solr.DisMaxRequestHandler" >
|
||||||
|
<str name="fq">
|
||||||
|
inStock:true
|
||||||
|
</str>
|
||||||
|
<str name="qf">
|
||||||
|
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
|
||||||
|
</str>
|
||||||
|
<str name="mm">
|
||||||
|
2<-1 5<-2 6<90%
|
||||||
|
</str>
|
||||||
</requestHandler>
|
</requestHandler>
|
||||||
-->
|
|
||||||
|
|
||||||
<!-- config for the admin interface -->
|
<!-- config for the admin interface -->
|
||||||
<admin>
|
<admin>
|
||||||
|
|
|
@ -0,0 +1,374 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2006 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.request;
|
||||||
|
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.core.SolrInfoMBean;
|
||||||
|
import org.apache.solr.core.SolrException;
|
||||||
|
|
||||||
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
import org.apache.solr.search.DocIterator;
|
||||||
|
import org.apache.solr.search.DocSet;
|
||||||
|
import org.apache.solr.search.DocList;
|
||||||
|
import org.apache.solr.search.DocListAndSet;
|
||||||
|
import org.apache.solr.search.SolrCache;
|
||||||
|
import org.apache.solr.search.SolrQueryParser;
|
||||||
|
import org.apache.solr.search.QueryParsing;
|
||||||
|
import org.apache.solr.search.CacheRegenerator;
|
||||||
|
|
||||||
|
import org.apache.solr.request.StandardRequestHandler;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.request.SolrQueryResponse;
|
||||||
|
import org.apache.solr.request.SolrRequestHandler;
|
||||||
|
|
||||||
|
import org.apache.solr.schema.IndexSchema;
|
||||||
|
import org.apache.solr.schema.FieldType;
|
||||||
|
|
||||||
|
import org.apache.solr.util.StrUtils;
|
||||||
|
import org.apache.solr.util.NamedList;
|
||||||
|
import org.apache.solr.util.SolrPluginUtils;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
import org.apache.lucene.search.ConstantScoreRangeQuery;
|
||||||
|
import org.apache.lucene.search.Sort;
|
||||||
|
import org.apache.lucene.search.Explanation;
|
||||||
|
import org.apache.lucene.queryParser.QueryParser;
|
||||||
|
import org.apache.lucene.queryParser.ParseException;
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
|
||||||
|
import org.xmlpull.v1.XmlPullParserException;
|
||||||
|
|
||||||
|
/* this is the standard logging framework for Solr */
|
||||||
|
import java.util.logging.Logger;
|
||||||
|
import java.util.logging.Level;
|
||||||
|
import java.util.logging.Handler;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URL;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>
|
||||||
|
* A Generic query plugin designed to be given a simple query expression
|
||||||
|
* from a user, which it will then query agaisnt a variety of
|
||||||
|
* pre-configured fields, in a variety of ways, using BooleanQueries,
|
||||||
|
* DisjunctionMaxQueries, and PhraseQueries.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* All of the following options may be configured for this plugin
|
||||||
|
* in the solrconfig as defaults, and may be overriden as request parameters
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <ul>
|
||||||
|
* <li>tie - (Tie breaker) float value to use as tiebreaker in
|
||||||
|
* DisjunctionMaxQueries (should be something much less then 1)
|
||||||
|
* </li>
|
||||||
|
* <li> qf - (Query Fields) fields and boosts to use when building
|
||||||
|
* DisjunctionMaxQueries from the users query. Format is:
|
||||||
|
* "<code>fieldA^1.0 fieldB^2.2</code>".
|
||||||
|
* </li>
|
||||||
|
* <li> mm - (Minimum Match) this supports a wide variety of
|
||||||
|
* complex expressions.
|
||||||
|
* read {@link SolrPluginUtils#setMinShouldMatch SolrPluginUtils.setMinShouldMatch} for full details.
|
||||||
|
* </li>
|
||||||
|
* <li> pf - (Phrase Fields) fields/boosts to make phrase queries out
|
||||||
|
* of to boost
|
||||||
|
* the users query for exact matches on the specified fields.
|
||||||
|
* Format is: "<code>fieldA^1.0 fieldB^2.2</code>".
|
||||||
|
* </li>
|
||||||
|
* <li> ps - (Phrase Slop) amount of slop on phrase queries built for pf
|
||||||
|
* fields.
|
||||||
|
* </li>
|
||||||
|
* <li> bq - (Boost Query) a raw lucene query that will be included in the
|
||||||
|
* users query to influcene the score. If this is a BooleanQuery
|
||||||
|
* with a default boost (1.0f) then the individual clauses will be
|
||||||
|
* added directly to the main query. Otherwise the query will be
|
||||||
|
* included as is.
|
||||||
|
* </li>
|
||||||
|
* <li> bf - (Boost Functions) functions (with optional boosts) that will be
|
||||||
|
* included in the users query to influcene the score.
|
||||||
|
* Format is: "<code>funcA(arg1,arg2)^1.2
|
||||||
|
* funcB(arg3,arg4)^2.2</code>". NOTE: Whitespace is not allowed
|
||||||
|
* in the function arguments.
|
||||||
|
* </li>
|
||||||
|
* <li> fq - (Filter Query) a raw lucene query that can be used
|
||||||
|
* to restrict the super set of products we are interested in - more
|
||||||
|
* efficient then using bq, but doesn't influence score.
|
||||||
|
* </li>
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* The following options are only available as request params...
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <ul>
|
||||||
|
* <li> q - (Query) the raw unparsed, unescaped, query from the user.
|
||||||
|
* </li>
|
||||||
|
* <li>sort - (Order By) list of fields and direction to sort on.
|
||||||
|
* </li>
|
||||||
|
* </ul>
|
||||||
|
*/
|
||||||
|
public class DisMaxRequestHandler
|
||||||
|
implements SolrRequestHandler, SolrInfoMBean {
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A field we can't ever find in any schema, so we can safely tell
|
||||||
|
* DisjunctionMaxQueryParser to use it as our defaultField, and
|
||||||
|
* map aliases from it to any field in our schema.
|
||||||
|
*/
|
||||||
|
private static String IMPOSSIBLE_FIELD_NAME = "\uFFFC\uFFFC\uFFFC";
|
||||||
|
|
||||||
|
// statistics
|
||||||
|
// TODO: should we bother synchronizing these, or is an off-by-one error
|
||||||
|
// acceptable every million requests or so?
|
||||||
|
long numRequests;
|
||||||
|
long numErrors;
|
||||||
|
|
||||||
|
/** shorten the class referneces for utilities */
|
||||||
|
private static class U extends SolrPluginUtils {
|
||||||
|
/* :NOOP */
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final U.CommonParams params = new U.CommonParams();
|
||||||
|
|
||||||
|
public DisMaxRequestHandler() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* returns URLs to the Wiki pages */
|
||||||
|
public URL[] getDocs() {
|
||||||
|
/* :TODO: need docs */
|
||||||
|
return new URL[0];
|
||||||
|
}
|
||||||
|
public String getName() {
|
||||||
|
return this.getClass().getName();
|
||||||
|
}
|
||||||
|
|
||||||
|
public NamedList getStatistics() {
|
||||||
|
NamedList lst = new NamedList();
|
||||||
|
lst.add("requests", numRequests);
|
||||||
|
lst.add("errors", numErrors);
|
||||||
|
return lst;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getVersion() {
|
||||||
|
return "$Revision:$";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return "DisjunctionMax Request Handler: Does relevancy based queries "
|
||||||
|
+ "accross a variety of fields using configured boosts";
|
||||||
|
}
|
||||||
|
|
||||||
|
public Category getCategory() {
|
||||||
|
return Category.QUERYHANDLER;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSourceId() {
|
||||||
|
return "$Id:$";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSource() {
|
||||||
|
return "$URL:$";
|
||||||
|
}
|
||||||
|
|
||||||
|
/** sets the default variables for any usefull info it finds in the config
|
||||||
|
* if a config option is not inthe format expected, logs an warning
|
||||||
|
* and ignores it..
|
||||||
|
*/
|
||||||
|
public void init(NamedList args) {
|
||||||
|
|
||||||
|
params.setValues(args);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
|
||||||
|
numRequests++;
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
|
SolrIndexSearcher s = req.getSearcher();
|
||||||
|
IndexSchema schema = req.getSchema();
|
||||||
|
|
||||||
|
Map<String,Float> queryFields =
|
||||||
|
U.parseFieldBoosts(U.getParam(req, params.QF, params.qf));
|
||||||
|
Map<String,Float> phraseFields =
|
||||||
|
U.parseFieldBoosts(U.getParam(req, params.PF, params.pf));
|
||||||
|
|
||||||
|
float tiebreaker = U.getNumberParam
|
||||||
|
(req, params.TIE, params.tiebreaker).floatValue();
|
||||||
|
|
||||||
|
int pslop = U.getNumberParam(req, params.PS, params.pslop).intValue();
|
||||||
|
|
||||||
|
/* a generic parser for parsing regular lucene queries */
|
||||||
|
QueryParser p = new SolrQueryParser(schema, null);
|
||||||
|
|
||||||
|
/* a parser for dealing with user input, which will convert
|
||||||
|
* things to DisjunctionMaxQueries
|
||||||
|
*/
|
||||||
|
U.DisjunctionMaxQueryParser up =
|
||||||
|
new U.DisjunctionMaxQueryParser(schema, IMPOSSIBLE_FIELD_NAME);
|
||||||
|
up.addAlias(IMPOSSIBLE_FIELD_NAME,
|
||||||
|
tiebreaker, queryFields);
|
||||||
|
|
||||||
|
/* for parsing slopy phrases using DisjunctionMaxQueries */
|
||||||
|
U.DisjunctionMaxQueryParser pp =
|
||||||
|
new U.DisjunctionMaxQueryParser(schema, IMPOSSIBLE_FIELD_NAME);
|
||||||
|
pp.addAlias(IMPOSSIBLE_FIELD_NAME,
|
||||||
|
tiebreaker, phraseFields);
|
||||||
|
pp.setPhraseSlop(pslop);
|
||||||
|
|
||||||
|
|
||||||
|
/* * * Main User Query * * */
|
||||||
|
|
||||||
|
String userQuery = U.partialEscape
|
||||||
|
(U.stripUnbalancedQuotes(req.getQueryString())).toString();
|
||||||
|
|
||||||
|
/* the main query we will execute. we disable the coord because
|
||||||
|
* this query is an artificial construct
|
||||||
|
*/
|
||||||
|
BooleanQuery query = new BooleanQuery(true);
|
||||||
|
|
||||||
|
String minShouldMatch = U.getParam(req, params.MM, params.mm);
|
||||||
|
|
||||||
|
Query dis = up.parse(userQuery);
|
||||||
|
|
||||||
|
if (dis instanceof BooleanQuery) {
|
||||||
|
BooleanQuery t = new BooleanQuery();
|
||||||
|
U.flatenBooleanQuery(t, (BooleanQuery)dis);
|
||||||
|
|
||||||
|
U.setMinShouldMatch(t, minShouldMatch);
|
||||||
|
|
||||||
|
query.add(t, Occur.MUST);
|
||||||
|
} else {
|
||||||
|
query.add(dis, Occur.MUST);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* * * Add on Phrases for the Query * * */
|
||||||
|
|
||||||
|
/* build up phrase boosting queries */
|
||||||
|
|
||||||
|
/* if the userQuery already has some quotes, stip them out.
|
||||||
|
* we've already done the phrases they asked for in the main
|
||||||
|
* part of the query, this is to boost docs that may not have
|
||||||
|
* matched those phrases but do match looser phrases.
|
||||||
|
*/
|
||||||
|
String userPhraseQuery = userQuery.replace("\"","");
|
||||||
|
Query phrase = pp.parse("\"" + userPhraseQuery + "\"");
|
||||||
|
if (null != phrase) {
|
||||||
|
query.add(phrase, Occur.SHOULD);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* * * Boosting Query * * */
|
||||||
|
|
||||||
|
String boostQuery = U.getParam(req, params.BQ, params.bq);
|
||||||
|
if (null != boostQuery && !boostQuery.equals("")) {
|
||||||
|
Query tmp = p.parse(boostQuery);
|
||||||
|
/* if the default boost was used, and we've got a BooleanQuery
|
||||||
|
* extract the subqueries out and use them directly
|
||||||
|
*/
|
||||||
|
if (1.0f == tmp.getBoost() && tmp instanceof BooleanQuery) {
|
||||||
|
for (BooleanClause c : ((BooleanQuery)tmp).getClauses()) {
|
||||||
|
query.add(c);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
query.add(tmp, BooleanClause.Occur.SHOULD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* * * Boosting Functions * * */
|
||||||
|
|
||||||
|
String boostFunc = U.getParam(req, params.BF, params.bf);
|
||||||
|
if (null != boostFunc && !boostFunc.equals("")) {
|
||||||
|
List<Query> funcs = U.parseFuncs(schema, boostFunc);
|
||||||
|
for (Query f : funcs) {
|
||||||
|
query.add(f, Occur.SHOULD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* * * Restrict Results * * */
|
||||||
|
|
||||||
|
List<Query> restrictions = new ArrayList<Query>(1);
|
||||||
|
|
||||||
|
/* User Restriction */
|
||||||
|
String filterQueryString = U.getParam(req, params.FQ, params.fq);
|
||||||
|
Query filterQuery = null;
|
||||||
|
if (null != filterQueryString && !filterQueryString.equals("")) {
|
||||||
|
filterQuery = p.parse(filterQueryString);
|
||||||
|
restrictions.add(filterQuery);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* * * Generate Main Results * * */
|
||||||
|
|
||||||
|
DocList results = s.getDocList(query, restrictions,
|
||||||
|
SolrPluginUtils.getSort(req),
|
||||||
|
req.getStart(), req.getLimit(),
|
||||||
|
SolrIndexSearcher.GET_SCORES);
|
||||||
|
rsp.add("search-results",results);
|
||||||
|
|
||||||
|
U.setReturnFields(U.getParam(req, params.FL, params.fl), rsp);
|
||||||
|
|
||||||
|
|
||||||
|
/* * * Debugging Info * * */
|
||||||
|
|
||||||
|
try {
|
||||||
|
NamedList debug = U.doStandardDebug(req, userQuery, query, results);
|
||||||
|
if (null != debug) {
|
||||||
|
debug.add("boostquery", boostQuery);
|
||||||
|
debug.add("boostfunc", boostFunc);
|
||||||
|
|
||||||
|
debug.add("filterquery", filterQueryString);
|
||||||
|
if (null != filterQuery) {
|
||||||
|
debug.add("parsedfilterquery",
|
||||||
|
QueryParsing.toString(filterQuery, schema));
|
||||||
|
}
|
||||||
|
|
||||||
|
rsp.add("debug", debug);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
SolrException.logOnce(SolrCore.log,
|
||||||
|
"Exception durring debug", e);
|
||||||
|
rsp.add("exception_during_debug", SolrException.toStr(e));
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
SolrException.log(SolrCore.log,e);
|
||||||
|
rsp.setException(e);
|
||||||
|
numErrors++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,823 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2006 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.util;
|
||||||
|
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.core.SolrInfoMBean;
|
||||||
|
import org.apache.solr.core.SolrException;
|
||||||
|
|
||||||
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
import org.apache.solr.search.DocIterator;
|
||||||
|
import org.apache.solr.search.DocSet;
|
||||||
|
import org.apache.solr.search.DocList;
|
||||||
|
import org.apache.solr.search.DocListAndSet;
|
||||||
|
import org.apache.solr.search.SolrCache;
|
||||||
|
import org.apache.solr.search.SolrQueryParser;
|
||||||
|
import org.apache.solr.search.QueryParsing;
|
||||||
|
import org.apache.solr.search.CacheRegenerator;
|
||||||
|
|
||||||
|
import org.apache.solr.request.StandardRequestHandler;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.request.SolrQueryResponse;
|
||||||
|
import org.apache.solr.request.SolrRequestHandler;
|
||||||
|
|
||||||
|
import org.apache.solr.schema.IndexSchema;
|
||||||
|
import org.apache.solr.schema.FieldType;
|
||||||
|
|
||||||
|
import org.apache.solr.util.StrUtils;
|
||||||
|
import org.apache.solr.util.NamedList;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
import org.apache.lucene.search.ConstantScoreRangeQuery;
|
||||||
|
import org.apache.lucene.search.Sort;
|
||||||
|
import org.apache.lucene.search.Explanation;
|
||||||
|
import org.apache.lucene.queryParser.QueryParser;
|
||||||
|
import org.apache.lucene.queryParser.ParseException;
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
|
||||||
|
import org.xmlpull.v1.XmlPullParserException;
|
||||||
|
|
||||||
|
import java.util.logging.Logger;
|
||||||
|
import java.util.logging.Level;
|
||||||
|
import java.util.logging.Handler;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URL;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Utilities that may be of use to RequestHandlers.</p>
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* Many of these functions have code that was stolen/mutated from
|
||||||
|
* StandardRequestHandler.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <p>:TODO: refactor StandardRequestHandler to use these utilities</p>
|
||||||
|
*/
|
||||||
|
public class SolrPluginUtils {
|
||||||
|
|
||||||
|
/** standard param for field list */
|
||||||
|
public static String FL = CommonParams.FL;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* SolrIndexSearch.numDocs(Query,Query) freaks out if the filtering
|
||||||
|
* query is null, so we use this workarround.
|
||||||
|
*/
|
||||||
|
public static int numDocs(SolrIndexSearcher s, Query q, Query f)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
return (null == f) ? s.getDocSet(q).size() : s.numDocs(q,f);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the param, or the default if it's empty or not specified.
|
||||||
|
*/
|
||||||
|
public static String getParam(SolrQueryRequest req,
|
||||||
|
String param, String def) {
|
||||||
|
|
||||||
|
String v = req.getParam(param);
|
||||||
|
if (null == v || "".equals(v.trim())) {
|
||||||
|
return def;
|
||||||
|
}
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Treats the param value as a Number, returns the default if nothing is
|
||||||
|
* there or if it's not a number.
|
||||||
|
*/
|
||||||
|
public static Number getNumberParam(SolrQueryRequest req,
|
||||||
|
String param, Number def) {
|
||||||
|
|
||||||
|
Number r = def;
|
||||||
|
String v = req.getParam(param);
|
||||||
|
if (null == v || "".equals(v.trim())) {
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
r = new Float(v);
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
/* :NOOP" */
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private final static Pattern splitList=Pattern.compile(",| ");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assumes the standard query param of "fl" to specify the return fields
|
||||||
|
* @see #setReturnFields(String,SolrQueryResponse)
|
||||||
|
*/
|
||||||
|
public static void setReturnFields(SolrQueryRequest req,
|
||||||
|
SolrQueryResponse res) {
|
||||||
|
|
||||||
|
setReturnFields(req.getParam(FL), res);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a space seperated list of field names, sets the field list on the
|
||||||
|
* SolrQueryResponse.
|
||||||
|
*/
|
||||||
|
public static void setReturnFields(String fl,
|
||||||
|
SolrQueryResponse res) {
|
||||||
|
|
||||||
|
if (fl != null) {
|
||||||
|
// TODO - this could become more efficient if widely used.
|
||||||
|
// TODO - should field order be maintained?
|
||||||
|
String[] flst = splitList.split(fl.trim(),0);
|
||||||
|
if (flst.length > 0 && !(flst.length==1 && flst[0].length()==0)) {
|
||||||
|
Set<String> set = new HashSet<String>();
|
||||||
|
for (String fname : flst) set.add(fname);
|
||||||
|
res.setReturnFields(set);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>
|
||||||
|
* Returns a NamedList containing many "standard" pieces of debugging
|
||||||
|
* information.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <ul>
|
||||||
|
* <li>rawquerystring - the 'q' param exactly as specified by the client
|
||||||
|
* </li>
|
||||||
|
* <li>querystring - the 'q' param after any preprocessing done by the plugin
|
||||||
|
* </li>
|
||||||
|
* <li>parsedquery - the main query executed formated by the Solr
|
||||||
|
* QueryParsing utils class (which knows about field types)
|
||||||
|
* </li>
|
||||||
|
* <li>parsedquery_toString - the main query executed formated by it's
|
||||||
|
* own toString method (in case it has internal state Solr
|
||||||
|
* doesn't know about)
|
||||||
|
* </li>
|
||||||
|
* <li>expain - the list of score explanations for each document in
|
||||||
|
* results against query.
|
||||||
|
* </li>
|
||||||
|
* <li>otherQuery - the query string specified in 'explainOther' query param.
|
||||||
|
* </li>
|
||||||
|
* <li>explainOther - the list of score explanations for each document in
|
||||||
|
* results against 'otherQuery'
|
||||||
|
* </li>
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
* @param req the request we are dealing with
|
||||||
|
* @param userQuery the users query as a string, after any basic
|
||||||
|
* preprocessing has been done
|
||||||
|
* @param query the query built from the userQuery
|
||||||
|
* (and perhaps other clauses) that identifies the main
|
||||||
|
* result set of the response.
|
||||||
|
* @param results the main result set of hte response
|
||||||
|
*/
|
||||||
|
public static NamedList doStandardDebug(SolrQueryRequest req,
|
||||||
|
String userQuery,
|
||||||
|
Query query,
|
||||||
|
DocList results)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
|
||||||
|
String debug = req.getParam("debugQuery");
|
||||||
|
|
||||||
|
NamedList dbg = null;
|
||||||
|
if (debug!=null) {
|
||||||
|
dbg = new NamedList();
|
||||||
|
|
||||||
|
/* userQuery may have been pre-processes .. expose that */
|
||||||
|
dbg.add("rawquerystring",req.getQueryString());
|
||||||
|
dbg.add("querystring",userQuery);
|
||||||
|
|
||||||
|
/* QueryParsing.toString isn't perfect, use it to see converted
|
||||||
|
* values, use regular toString to see any attributes of the
|
||||||
|
* underlying Query it may have missed.
|
||||||
|
*/
|
||||||
|
dbg.add("parsedquery",QueryParsing.toString(query, req.getSchema()));
|
||||||
|
dbg.add("parsedquery_toString", query.toString());
|
||||||
|
|
||||||
|
dbg.add("explain", getExplainList
|
||||||
|
(query, results, req.getSearcher(), req.getSchema()));
|
||||||
|
String otherQueryS = req.getParam("explainOther");
|
||||||
|
if (otherQueryS != null && otherQueryS.length() > 0) {
|
||||||
|
DocList otherResults = doSimpleQuery
|
||||||
|
(otherQueryS,req.getSearcher(), req.getSchema(),0,10);
|
||||||
|
dbg.add("otherQuery",otherQueryS);
|
||||||
|
dbg.add("explainOther", getExplainList
|
||||||
|
(query, otherResults,
|
||||||
|
req.getSearcher(),
|
||||||
|
req.getSchema()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return dbg;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates an list of Explanations for each item in a list of docs.
|
||||||
|
*
|
||||||
|
* @param query The Query you want explanations in the context of
|
||||||
|
* @param docs The Documents you want explained relative that query
|
||||||
|
*/
|
||||||
|
public static NamedList getExplainList(Query query, DocList docs,
|
||||||
|
SolrIndexSearcher searcher,
|
||||||
|
IndexSchema schema)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
NamedList explainList = new NamedList();
|
||||||
|
DocIterator iterator = docs.iterator();
|
||||||
|
for (int i=0; i<docs.size(); i++) {
|
||||||
|
int id = iterator.nextDoc();
|
||||||
|
|
||||||
|
Explanation explain = searcher.explain(query, id);
|
||||||
|
|
||||||
|
Document doc = searcher.doc(id);
|
||||||
|
String strid = schema.printableUniqueKey(doc);
|
||||||
|
String docname = "";
|
||||||
|
if (strid != null) docname="id="+strid+",";
|
||||||
|
docname = docname + "internal_docid="+id;
|
||||||
|
|
||||||
|
explainList.add(docname, "\n" +explain.toString());
|
||||||
|
}
|
||||||
|
return explainList;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Executes a basic query in lucene syntax
|
||||||
|
*/
|
||||||
|
public static DocList doSimpleQuery(String sreq,
|
||||||
|
SolrIndexSearcher searcher,
|
||||||
|
IndexSchema schema,
|
||||||
|
int start, int limit) throws IOException {
|
||||||
|
List<String> commands = StrUtils.splitSmart(sreq,';');
|
||||||
|
|
||||||
|
String qs = commands.size() >= 1 ? commands.get(0) : "";
|
||||||
|
Query query = QueryParsing.parseQuery(qs, schema);
|
||||||
|
|
||||||
|
// If the first non-query, non-filter command is a simple sort on an indexed field, then
|
||||||
|
// we can use the Lucene sort ability.
|
||||||
|
Sort sort = null;
|
||||||
|
if (commands.size() >= 2) {
|
||||||
|
QueryParsing.SortSpec sortSpec = QueryParsing.parseSort(commands.get(1), schema);
|
||||||
|
if (sortSpec != null) {
|
||||||
|
sort = sortSpec.getSort();
|
||||||
|
if (sortSpec.getCount() >= 0) {
|
||||||
|
limit = sortSpec.getCount();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DocList results = searcher.getDocList(query,(DocSet)null, sort, start, limit);
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a string containing fieldNames and boost info,
|
||||||
|
* converts it to a Map from field name to boost info.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* Doesn't care if boost info is negative, you're on your own.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* Doesn't care if boost info is missing, again: you're on your own.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @param in a String like "fieldOne^2.3 fieldTwo fieldThree^-0.4"
|
||||||
|
* @return Map of fieldOne => 2.3, fieldTwo => null, fieldThree => -0.4
|
||||||
|
*/
|
||||||
|
public static Map<String,Float> parseFieldBoosts(String in) {
|
||||||
|
|
||||||
|
if (null == in || "".equals(in.trim())) {
|
||||||
|
return new HashMap<String,Float>();
|
||||||
|
}
|
||||||
|
|
||||||
|
String[] bb = in.trim().split("\\s+");
|
||||||
|
Map<String, Float> out = new HashMap<String,Float>(7);
|
||||||
|
for (String s : bb) {
|
||||||
|
String[] bbb = s.split("\\^");
|
||||||
|
out.put(bbb[0], 1 == bbb.length ? null : Float.valueOf(bbb[1]));
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a string containing functions with optional boosts, returns
|
||||||
|
* an array of Queries representing those functions with the specified
|
||||||
|
* boosts.
|
||||||
|
* <p>
|
||||||
|
* NOTE: intra-function whitespace is not allowed.
|
||||||
|
* </p>
|
||||||
|
* @see #parseFieldBoosts
|
||||||
|
*/
|
||||||
|
public static List<Query> parseFuncs(IndexSchema s, String in)
|
||||||
|
throws ParseException {
|
||||||
|
|
||||||
|
Map<String,Float> ff = parseFieldBoosts(in);
|
||||||
|
List<Query> funcs = new ArrayList<Query>(ff.keySet().size());
|
||||||
|
for (String f : ff.keySet()) {
|
||||||
|
Query fq = QueryParsing.parseFunction(f, s);
|
||||||
|
Float b = ff.get(f);
|
||||||
|
if (null != b) {
|
||||||
|
fq.setBoost(b);
|
||||||
|
}
|
||||||
|
funcs.add(fq);
|
||||||
|
}
|
||||||
|
return funcs;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks the number of optional clauses in the query, and compares it
|
||||||
|
* with the specification string to determine the proper value to use.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* Details about the specification format can be found
|
||||||
|
* <a href="doc-files/min-should-match.html">here</a>
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <p>A few important notes...</p>
|
||||||
|
* <ul>
|
||||||
|
* <li>
|
||||||
|
* If the calculations based on the specification determine that no
|
||||||
|
* optional clauses are needed, BooleanQuerysetMinMumberShouldMatch
|
||||||
|
* will never be called, but the usual rules about BooleanQueries
|
||||||
|
* still apply at search time (a BooleanQuery containing no required
|
||||||
|
* clauses must still match at least one optional clause)
|
||||||
|
* <li>
|
||||||
|
* <li>
|
||||||
|
* No matter what number the calculation arrives at,
|
||||||
|
* BooleanQuery.setMinShouldMatch() will never be called with a
|
||||||
|
* value greater then the number of optional clauses (or less then 1)
|
||||||
|
* </li>
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
* <p>:TODO: should optimize the case where number is same
|
||||||
|
* as clauses to just make them all "required"
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
public static void setMinShouldMatch(BooleanQuery q, String spec) {
|
||||||
|
|
||||||
|
int optionalClauses = 0;
|
||||||
|
for (BooleanClause c : q.getClauses()) {
|
||||||
|
if (c.getOccur() == Occur.SHOULD) {
|
||||||
|
optionalClauses++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int msm = calculateMinShouldMatch(optionalClauses, spec);
|
||||||
|
if (0 < msm) {
|
||||||
|
q.setMinimumNumberShouldMatch(msm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* helper exposed for UnitTests
|
||||||
|
* @see #setMinShouldMatch
|
||||||
|
*/
|
||||||
|
static int calculateMinShouldMatch(int optionalClauseCount, String spec) {
|
||||||
|
|
||||||
|
int result = optionalClauseCount;
|
||||||
|
|
||||||
|
|
||||||
|
if (-1 < spec.indexOf("<")) {
|
||||||
|
/* we have conditional spec(s) */
|
||||||
|
|
||||||
|
for (String s : spec.trim().split(" ")) {
|
||||||
|
String[] parts = s.split("<");
|
||||||
|
int upperBound = (new Integer(parts[0])).intValue();
|
||||||
|
if (optionalClauseCount <= upperBound) {
|
||||||
|
return result;
|
||||||
|
} else {
|
||||||
|
result = calculateMinShouldMatch
|
||||||
|
(optionalClauseCount, parts[1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* otherwise, simple expresion */
|
||||||
|
|
||||||
|
if (-1 < spec.indexOf("%")) {
|
||||||
|
/* percentage */
|
||||||
|
int percent = new Integer(spec.replace("%","")).intValue();
|
||||||
|
float calc = (result * percent) / 100f;
|
||||||
|
result = calc < 0 ? result + (int)calc : (int)calc;
|
||||||
|
} else {
|
||||||
|
int calc = (new Integer(spec)).intValue();
|
||||||
|
result = calc < 0 ? result + calc : calc;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (optionalClauseCount < result ?
|
||||||
|
optionalClauseCount : (result < 0 ? 0 : result));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursively walks the "from" query pulling out sub-queries and
|
||||||
|
* adding them to the "to" query.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* Boosts are multiplied as needed. Sub-BooleanQueryies which are not
|
||||||
|
* optional will not be flattened. From will be mangled durring the walk,
|
||||||
|
* so do not attempt to reuse it.
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
public static void flatenBooleanQuery(BooleanQuery to, BooleanQuery from) {
|
||||||
|
|
||||||
|
BooleanClause[] c = from.getClauses();
|
||||||
|
for (int i = 0; i < c.length; i++) {
|
||||||
|
|
||||||
|
Query ci = c[i].getQuery();
|
||||||
|
ci.setBoost(ci.getBoost() * from.getBoost());
|
||||||
|
|
||||||
|
if (ci instanceof BooleanQuery
|
||||||
|
&& !c[i].isRequired()
|
||||||
|
&& !c[i].isProhibited()) {
|
||||||
|
|
||||||
|
/* we can recurse */
|
||||||
|
flatenBooleanQuery(to, (BooleanQuery)ci);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
to.add(c[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Escapes all special characters except '"', '-', and '+'
|
||||||
|
*
|
||||||
|
* @see QueryParser#escape
|
||||||
|
*/
|
||||||
|
public static CharSequence partialEscape(CharSequence s) {
|
||||||
|
StringBuffer sb = new StringBuffer();
|
||||||
|
for (int i = 0; i < s.length(); i++) {
|
||||||
|
char c = s.charAt(i);
|
||||||
|
if (c == '\\' || c == '!' || c == '(' || c == ')' ||
|
||||||
|
c == ':' || c == '^' || c == '[' || c == ']' ||
|
||||||
|
c == '{' || c == '}' || c == '~' || c == '*' || c == '?'
|
||||||
|
) {
|
||||||
|
sb.append('\\');
|
||||||
|
}
|
||||||
|
sb.append(c);
|
||||||
|
}
|
||||||
|
return sb;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns it's input if there is an even (ie: balanced) number of
|
||||||
|
* '"' characters -- otherwise returns a String in which all '"'
|
||||||
|
* characters are striped out.
|
||||||
|
*/
|
||||||
|
public static CharSequence stripUnbalancedQuotes(CharSequence s) {
|
||||||
|
int count = 0;
|
||||||
|
for (int i = 0; i < s.length(); i++) {
|
||||||
|
if (s.charAt(i) == '\"') { count++; }
|
||||||
|
}
|
||||||
|
if (0 == (count & 1)) {
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
return s.toString().replace("\"","");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A collection on common params, both for Plugin initialization and
|
||||||
|
* for Requests.
|
||||||
|
*/
|
||||||
|
public static class CommonParams {
|
||||||
|
|
||||||
|
/** query and init param for tiebreaker value */
|
||||||
|
public static String TIE = "tie";
|
||||||
|
/** query and init param for query fields */
|
||||||
|
public static String QF = "qf";
|
||||||
|
/** query and init param for phrase boost fields */
|
||||||
|
public static String PF = "pf";
|
||||||
|
/** query and init param for MinShouldMatch specification */
|
||||||
|
public static String MM = "mm";
|
||||||
|
/** query and init param for Phrase Slop value */
|
||||||
|
public static String PS = "ps";
|
||||||
|
/** query and init param for boosting query */
|
||||||
|
public static String BQ = "bq";
|
||||||
|
/** query and init param for boosting functions */
|
||||||
|
public static String BF = "bf";
|
||||||
|
/** query and init param for filtering query */
|
||||||
|
public static String FQ = "fq";
|
||||||
|
/** query and init param for field list */
|
||||||
|
public static String FL = "fl";
|
||||||
|
/** query and init param for field list */
|
||||||
|
public static String GEN = "gen";
|
||||||
|
|
||||||
|
/** the default tie breaker to use in DisjunctionMaxQueries */
|
||||||
|
public float tiebreaker = 0.0f;
|
||||||
|
/** the default query fields to be used */
|
||||||
|
public String qf = null;
|
||||||
|
/** the default phrase boosting fields to be used */
|
||||||
|
public String pf = null;
|
||||||
|
/** the default min should match to be used */
|
||||||
|
public String mm = "100%";
|
||||||
|
/** the default phrase slop to be used */
|
||||||
|
public int pslop = 0;
|
||||||
|
/** the default boosting query to be used */
|
||||||
|
public String bq = null;
|
||||||
|
/** the default boosting functions to be used */
|
||||||
|
public String bf = null;
|
||||||
|
/** the default filtering query to be used */
|
||||||
|
public String fq = null;
|
||||||
|
/** the default field list to be used */
|
||||||
|
public String fl = null;
|
||||||
|
|
||||||
|
public CommonParams() {
|
||||||
|
/* :NOOP: */
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @see #setValues */
|
||||||
|
public CommonParams(NamedList args) {
|
||||||
|
this();
|
||||||
|
setValues(args);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the params using values from a NamedList, usefull in the
|
||||||
|
* init method for your handler.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* If any param is not of the expected type, a severe error is
|
||||||
|
* logged,and the param is skipped.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* If any param is not of in the NamedList, it is skipped and the
|
||||||
|
* old value is left alone.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public void setValues(NamedList args) {
|
||||||
|
|
||||||
|
Object tmp;
|
||||||
|
|
||||||
|
tmp = args.get(TIE);
|
||||||
|
if (null != tmp) {
|
||||||
|
if (tmp instanceof Float) {
|
||||||
|
tiebreaker = ((Float)tmp).floatValue();
|
||||||
|
} else {
|
||||||
|
SolrCore.log.severe("init param is not a float: " + TIE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp = args.get(QF);
|
||||||
|
if (null != tmp) {
|
||||||
|
if (tmp instanceof String) {
|
||||||
|
qf = tmp.toString();
|
||||||
|
} else {
|
||||||
|
SolrCore.log.severe("init param is not a str: " + QF);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp = args.get(PF);
|
||||||
|
if (null != tmp) {
|
||||||
|
if (tmp instanceof String) {
|
||||||
|
pf = tmp.toString();
|
||||||
|
} else {
|
||||||
|
SolrCore.log.severe("init param is not a str: " + PF);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
tmp = args.get(MM);
|
||||||
|
if (null != tmp) {
|
||||||
|
if (tmp instanceof String) {
|
||||||
|
mm = tmp.toString();
|
||||||
|
} else {
|
||||||
|
SolrCore.log.severe("init param is not a str: " + MM);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp = args.get(PS);
|
||||||
|
if (null != tmp) {
|
||||||
|
if (tmp instanceof Integer) {
|
||||||
|
pslop = ((Integer)tmp).intValue();
|
||||||
|
} else {
|
||||||
|
SolrCore.log.severe("init param is not an int: " + PS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp = args.get(BQ);
|
||||||
|
if (null != tmp) {
|
||||||
|
if (tmp instanceof String) {
|
||||||
|
bq = tmp.toString();
|
||||||
|
} else {
|
||||||
|
SolrCore.log.severe("init param is not a str: " + BQ);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp = args.get(BF);
|
||||||
|
if (null != tmp) {
|
||||||
|
if (tmp instanceof String) {
|
||||||
|
bf = tmp.toString();
|
||||||
|
} else {
|
||||||
|
SolrCore.log.severe("init param is not a str: " + BF);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp = args.get(FQ);
|
||||||
|
if (null != tmp) {
|
||||||
|
if (tmp instanceof String) {
|
||||||
|
fq = tmp.toString();
|
||||||
|
} else {
|
||||||
|
SolrCore.log.severe("init param is not a str: " + FQ);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp = args.get(FL);
|
||||||
|
if (null != tmp) {
|
||||||
|
if (tmp instanceof String) {
|
||||||
|
fl = tmp.toString();
|
||||||
|
} else {
|
||||||
|
SolrCore.log.severe("init param is not a str: " + FL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A subclass of SolrQueryParser that supports aliasing fields for
|
||||||
|
* constructing DisjunctionMaxQueries.
|
||||||
|
*/
|
||||||
|
public static class DisjunctionMaxQueryParser extends SolrQueryParser {
|
||||||
|
|
||||||
|
/** A simple container for storing alias info
|
||||||
|
* @see #aliases
|
||||||
|
*/
|
||||||
|
protected static class Alias {
|
||||||
|
public float tie;
|
||||||
|
public Map<String,Float> fields;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Where we store a map from field name we expect to see in our query
|
||||||
|
* string, to Alias object containing the fields to use in our
|
||||||
|
* DisjunctionMaxQuery and the tiebreaker to use.
|
||||||
|
*/
|
||||||
|
protected Map<String,Alias> aliases = new HashMap<String,Alias>(3);
|
||||||
|
|
||||||
|
public DisjunctionMaxQueryParser(IndexSchema s, String defaultField) {
|
||||||
|
super(s,defaultField);
|
||||||
|
}
|
||||||
|
public DisjunctionMaxQueryParser(IndexSchema s) {
|
||||||
|
this(s,null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add an alias to this query parser.
|
||||||
|
*
|
||||||
|
* @param field the field name that should trigger alias mapping
|
||||||
|
* @param fieldBoosts the mapping from fieldname to boost value that
|
||||||
|
* should be used to build up the clauses of the
|
||||||
|
* DisjunctionMaxQuery.
|
||||||
|
* @param tiebreaker to the tiebreaker to be used in the
|
||||||
|
* DisjunctionMaxQuery
|
||||||
|
* @see SolrPluginUtils#parseFieldBoosts
|
||||||
|
*/
|
||||||
|
public void addAlias(String field, float tiebreaker,
|
||||||
|
Map<String,Float> fieldBoosts) {
|
||||||
|
|
||||||
|
Alias a = new Alias();
|
||||||
|
a.tie = tiebreaker;
|
||||||
|
a.fields = fieldBoosts;
|
||||||
|
aliases.put(field, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delegates to the super class unless the field has been specified
|
||||||
|
* as an alias -- in which case we recurse on each of
|
||||||
|
* the aliased fields, and the results are composed into a
|
||||||
|
* DisjunctionMaxQuery. (so yes: aliases which point at other
|
||||||
|
* aliases should work)
|
||||||
|
*/
|
||||||
|
protected Query getFieldQuery(String field, String queryText)
|
||||||
|
throws ParseException {
|
||||||
|
|
||||||
|
if (aliases.containsKey(field)) {
|
||||||
|
|
||||||
|
Alias a = aliases.get(field);
|
||||||
|
DisjunctionMaxQuery q = new DisjunctionMaxQuery(a.tie);
|
||||||
|
|
||||||
|
/* we might not get any valid queries from delegation,
|
||||||
|
* in which we should return null
|
||||||
|
*/
|
||||||
|
boolean ok = false;
|
||||||
|
|
||||||
|
for (String f : a.fields.keySet()) {
|
||||||
|
|
||||||
|
Query sub = getFieldQuery(f,queryText);
|
||||||
|
if (null != sub) {
|
||||||
|
if (null != a.fields.get(f)) {
|
||||||
|
sub.setBoost(a.fields.get(f));
|
||||||
|
}
|
||||||
|
q.add(sub);
|
||||||
|
ok = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ok ? q : null;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
return super.getFieldQuery(field, queryText);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines the correct Sort based on the request parameter "sort"
|
||||||
|
*
|
||||||
|
* @return null if no sort is specified.
|
||||||
|
*/
|
||||||
|
public static Sort getSort(SolrQueryRequest req) {
|
||||||
|
|
||||||
|
String sort = req.getParam("sort");
|
||||||
|
if (null == sort || sort.equals("")) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
SolrException sortE = null;
|
||||||
|
QueryParsing.SortSpec ss = null;
|
||||||
|
try {
|
||||||
|
ss = QueryParsing.parseSort(sort, req.getSchema());
|
||||||
|
} catch (SolrException e) {
|
||||||
|
sortE = e;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((null == ss) || (null != sortE)) {
|
||||||
|
/* we definitely had some sort of sort string from the user,
|
||||||
|
* but no SortSpec came out of it
|
||||||
|
*/
|
||||||
|
SolrCore.log.log(Level.WARNING,"Invalid sort \""+sort+"\" was specified, ignoring", sortE);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ss.getSort();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A CacheRegenerator that can be used whenever the items in the cache
|
||||||
|
* are not dependant on the current searcher.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* Flat out copies the oldKey=>oldVal pair into the newCache
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
public static class IdentityRegenerator implements CacheRegenerator {
|
||||||
|
public boolean regenerateItem(SolrIndexSearcher newSearcher,
|
||||||
|
SolrCache newCache,
|
||||||
|
SolrCache oldCache,
|
||||||
|
Object oldKey,
|
||||||
|
Object oldVal)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
newCache.put(oldKey,oldVal);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,110 @@
|
||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Min Number Should Match Specification Format</title>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<h1>Min Number Should Match Specification Format</h1>
|
||||||
|
|
||||||
|
<blockquote>
|
||||||
|
This document explains the format used for specifying the
|
||||||
|
"Min Number Should Match" criteria of the BooleanQuery objects built by the
|
||||||
|
DisMaxRequestHandler.
|
||||||
|
</blockquote>
|
||||||
|
|
||||||
|
|
||||||
|
<h2>Explaination of Concept: "Min Number Should Match"</h2>
|
||||||
|
<div>
|
||||||
|
:TODO:
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<h2>Specification Format</h2>
|
||||||
|
<div>
|
||||||
|
|
||||||
|
<p>Specification strings may have the following formats...</p>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
|
||||||
|
<dt><code>3</code></dt>
|
||||||
|
<dd>A positive integer, indicating a fixed value regardless of the
|
||||||
|
number of optional clauses.
|
||||||
|
</dd>
|
||||||
|
|
||||||
|
<dt><code>-2</code></dt>
|
||||||
|
<dd>A negative integer, indicating that the total number of optional clauses,
|
||||||
|
minus this number should be mandatory.
|
||||||
|
</dd>
|
||||||
|
|
||||||
|
<dt><code>75%</code></dt>
|
||||||
|
<dd>A percentage, indicating that this percent of the total number of
|
||||||
|
optional clauses are neccessary. The number computed from the
|
||||||
|
percentage is rounded down and used as the minimum.
|
||||||
|
</dd>
|
||||||
|
|
||||||
|
<dt><code>-25%</code></dt>
|
||||||
|
<dd>A negative percentage, indicating that this percent of the total
|
||||||
|
number of optional clauses can be missing. The number computed from the
|
||||||
|
percentage is rounded down, before being subtracted from the total
|
||||||
|
to determine the minimum.
|
||||||
|
</dd>
|
||||||
|
|
||||||
|
<dt><code>3<90%</code></dt>
|
||||||
|
<dd>A positive integer, followed by the less-than symbol, followed
|
||||||
|
by any of the previously mentioned specifiers is a conditional
|
||||||
|
specification. It indicates that if the number of optional clauses is
|
||||||
|
equal to (or less than) the integer, they are all required, but
|
||||||
|
if it's greater then the integer, the specification applies.
|
||||||
|
In this example: if there are 1 to 3 clauses they are all required,
|
||||||
|
but for 4 or more clauses only 90% are required.
|
||||||
|
</dd>
|
||||||
|
|
||||||
|
<dt><code>2<-25% 9<-3</code></dt>
|
||||||
|
<dd>Multiple conditional specifications can be seperated by spaces,
|
||||||
|
each one only being valid for numbers greater then the one before it.
|
||||||
|
In this example: if there are 1 or 2 clauses both are required,
|
||||||
|
if there are 3-9 clauses all but 25% are requred, and if there
|
||||||
|
are more then 9 clauses, all but three are required.
|
||||||
|
</dd>
|
||||||
|
</dl>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
A few important notes...
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>
|
||||||
|
When dealing with percentages, negative values can be used to get
|
||||||
|
different behavior in edge cases. 75% and -25% mean the same thing
|
||||||
|
when dealing with 4 clauses, but when dealing with 5 clauses 75% means
|
||||||
|
3 are required, but -25% means 4 are required.
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
If the calculations based on the specification determine that no
|
||||||
|
optional clauses are needed, the usual rules about BooleanQueries
|
||||||
|
still apply at search time (a BooleanQuery containing no required
|
||||||
|
clauses must still match at least one optional clause)
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
No matter what number the calculation arrives at,
|
||||||
|
a value greater then the number of optional clauses, or a value less then
|
||||||
|
1 will never be used. (ie: no matter how low or how high the result of the
|
||||||
|
calculation result is, the minimum number of required matches will never
|
||||||
|
be lower then 1 or greatered then the number of clauses.
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<hr>
|
||||||
|
<pre>
|
||||||
|
$Id:$
|
||||||
|
$Source:$
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
</body> </html>
|
|
@ -0,0 +1,103 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2006 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr;
|
||||||
|
|
||||||
|
import org.apache.solr.request.*;
|
||||||
|
import org.apache.solr.util.*;
|
||||||
|
import org.w3c.dom.Document;
|
||||||
|
|
||||||
|
import javax.xml.parsers.DocumentBuilderFactory;
|
||||||
|
import javax.xml.parsers.DocumentBuilder;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.StringWriter;
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests some basic functionality of the DisMaxRequestHandler
|
||||||
|
*/
|
||||||
|
public class DisMaxRequestHandlerTest extends AbstractSolrTestCase {
|
||||||
|
|
||||||
|
public String getSchemaFile() { return "schema.xml"; }
|
||||||
|
public String getSolrConfigFile() { return "solrconfig.xml"; }
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
lrf = h.getRequestFactory
|
||||||
|
("dismax",0,20,"version","2.0");
|
||||||
|
}
|
||||||
|
public void testSomeStuff() throws Exception {
|
||||||
|
|
||||||
|
assertU(adoc("id", "666",
|
||||||
|
"features_t", "cool and scary stuff",
|
||||||
|
"subject", "traveling in hell",
|
||||||
|
"title", "The Omen",
|
||||||
|
"weight", "87.9",
|
||||||
|
"iind", "666"));
|
||||||
|
assertU(adoc("id", "42",
|
||||||
|
"features_t", "cool stuff",
|
||||||
|
"subject", "traveling the galaxy",
|
||||||
|
"title", "Hitch Hiker's Guide to the Galaxy",
|
||||||
|
"weight", "99.45",
|
||||||
|
"iind", "42"));
|
||||||
|
assertU(adoc("id", "1",
|
||||||
|
"features_t", "nothing",
|
||||||
|
"subject", "garbage",
|
||||||
|
"title", "Most Boring Guide Ever",
|
||||||
|
"weight", "77",
|
||||||
|
"iind", "4"));
|
||||||
|
assertU(adoc("id", "8675309",
|
||||||
|
"features_t", "Wikedly memorable chorus and stuff",
|
||||||
|
"subject", "One Cool Hot Chick",
|
||||||
|
"title", "Jenny",
|
||||||
|
"weight", "97.3",
|
||||||
|
"iind", "8675309"));
|
||||||
|
assertU(commit());
|
||||||
|
|
||||||
|
assertQ("basic match",
|
||||||
|
req("guide")
|
||||||
|
,"//*[@numFound='2']"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ("basic cross field matching, boost on same field matching",
|
||||||
|
req("cool stuff")
|
||||||
|
,"//*[@numFound='3']"
|
||||||
|
,"//result/doc[1]/int[@name='id'][.='42']"
|
||||||
|
,"//result/doc[2]/int[@name='id'][.='666']"
|
||||||
|
,"//result/doc[3]/int[@name='id'][.='8675309']"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ("minimum mm is three",
|
||||||
|
req("cool stuff traveling")
|
||||||
|
,"//*[@numFound='2']"
|
||||||
|
,"//result/doc[1]/int[@name='id'][. ='42']"
|
||||||
|
,"//result/doc[2]/int[@name='id'][. ='666']"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ("at 4 mm allows one missing ",
|
||||||
|
req("cool stuff traveling jenny")
|
||||||
|
,"//*[@numFound='3']"
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,329 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2006 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.util;
|
||||||
|
|
||||||
|
import org.apache.solr.search.SolrQueryParser;
|
||||||
|
import org.apache.solr.util.NamedList;
|
||||||
|
import org.apache.solr.util.SolrPluginUtils;
|
||||||
|
import org.apache.solr.util.SolrPluginUtils.DisjunctionMaxQueryParser;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
|
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
|
||||||
|
import org.xmlpull.v1.XmlPullParserFactory;
|
||||||
|
|
||||||
|
import junit.framework.Test;
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
import junit.framework.TestSuite;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests that the functions in SolrPluginUtils work as advertised.
|
||||||
|
*/
|
||||||
|
public class SolrPluginUtilsTest extends AbstractSolrTestCase {
|
||||||
|
|
||||||
|
public String getSchemaFile() { return "schema.xml"; }
|
||||||
|
public String getSolrConfigFile() { return "solrconfig.xml"; }
|
||||||
|
|
||||||
|
public void testPartialEscape() {
|
||||||
|
|
||||||
|
assertEquals("",pe(""));
|
||||||
|
assertEquals("foo",pe("foo"));
|
||||||
|
assertEquals("foo\\:bar",pe("foo:bar"));
|
||||||
|
assertEquals("+foo\\:bar",pe("+foo:bar"));
|
||||||
|
assertEquals("foo \\! bar",pe("foo ! bar"));
|
||||||
|
assertEquals("foo\\?",pe("foo?"));
|
||||||
|
assertEquals("foo \"bar\"",pe("foo \"bar\""));
|
||||||
|
assertEquals("foo\\! \"bar\"",pe("foo! \"bar\""));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testStripUnbalancedQuotes() {
|
||||||
|
|
||||||
|
assertEquals("",strip(""));
|
||||||
|
assertEquals("foo",strip("foo"));
|
||||||
|
assertEquals("foo \"bar\"",strip("foo \"bar\""));
|
||||||
|
assertEquals("42",strip("42\""));
|
||||||
|
assertEquals("\"how now brown cow?\"",strip("\"how now brown cow?\""));
|
||||||
|
assertEquals("\"you go\" \"now!\"",strip("\"you go\" \"now!\""));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testParseFieldBoosts() throws Exception {
|
||||||
|
|
||||||
|
Map<String,Float> e1 = new HashMap<String,Float>();
|
||||||
|
e1.put("fieldOne",2.3f);
|
||||||
|
e1.put("fieldTwo",null);
|
||||||
|
e1.put("fieldThree",-0.4f);
|
||||||
|
|
||||||
|
assertEquals("basic e1", e1, SolrPluginUtils.parseFieldBoosts
|
||||||
|
("fieldOne^2.3 fieldTwo fieldThree^-0.4"));
|
||||||
|
assertEquals("spacey e1", e1, SolrPluginUtils.parseFieldBoosts
|
||||||
|
(" fieldOne^2.3 fieldTwo fieldThree^-0.4 "));
|
||||||
|
assertEquals("really spacey e1", e1, SolrPluginUtils.parseFieldBoosts
|
||||||
|
(" \t fieldOne^2.3 \n fieldTwo fieldThree^-0.4 "));
|
||||||
|
|
||||||
|
Map<String,Float> e2 = new HashMap<String,Float>();
|
||||||
|
assertEquals("empty e2", e2, SolrPluginUtils.parseFieldBoosts
|
||||||
|
(""));
|
||||||
|
assertEquals("spacey e2", e2, SolrPluginUtils.parseFieldBoosts
|
||||||
|
(" \t "));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void testDisjunctionMaxQueryParser() throws Exception {
|
||||||
|
|
||||||
|
Query out;
|
||||||
|
String t;
|
||||||
|
|
||||||
|
DisjunctionMaxQueryParser qp =
|
||||||
|
new SolrPluginUtils.DisjunctionMaxQueryParser(h.getCore().getSchema());
|
||||||
|
|
||||||
|
qp.addAlias("hoss", 0.01f, SolrPluginUtils.parseFieldBoosts
|
||||||
|
("title^2.0 title_stemmed name^1.2 subject^0.5"));
|
||||||
|
qp.addAlias("test", 0.01f, SolrPluginUtils.parseFieldBoosts("text^2.0"));
|
||||||
|
qp.addAlias("unused", 1.0f, SolrPluginUtils.parseFieldBoosts
|
||||||
|
("subject^0.5 sind^1.5"));
|
||||||
|
|
||||||
|
|
||||||
|
/* first some sanity tests that don't use aliasing at all */
|
||||||
|
|
||||||
|
t = "XXXXXXXX";
|
||||||
|
out = qp.parse(t);
|
||||||
|
assertNotNull(t+" sanity test gave back null", out);
|
||||||
|
assertTrue(t+" sanity test isn't TermQuery: " + out.getClass(),
|
||||||
|
out instanceof TermQuery);
|
||||||
|
assertEquals(t+" sanity test is wrong field",
|
||||||
|
h.getCore().getSchema().getDefaultSearchFieldName(),
|
||||||
|
((TermQuery)out).getTerm().field());
|
||||||
|
|
||||||
|
t = "subject:XXXXXXXX";
|
||||||
|
out = qp.parse(t);
|
||||||
|
assertNotNull(t+" sanity test gave back null", out);
|
||||||
|
assertTrue(t+" sanity test isn't TermQuery: " + out.getClass(),
|
||||||
|
out instanceof TermQuery);
|
||||||
|
assertEquals(t+" sanity test is wrong field", "subject",
|
||||||
|
((TermQuery)out).getTerm().field());
|
||||||
|
|
||||||
|
/* field has untokenzied type, so this should be a term anyway */
|
||||||
|
t = "sind:\"simple phrase\"";
|
||||||
|
out = qp.parse(t);
|
||||||
|
assertNotNull(t+" sanity test gave back null", out);
|
||||||
|
assertTrue(t+" sanity test isn't TermQuery: " + out.getClass(),
|
||||||
|
out instanceof TermQuery);
|
||||||
|
assertEquals(t+" sanity test is wrong field", "sind",
|
||||||
|
((TermQuery)out).getTerm().field());
|
||||||
|
|
||||||
|
t = "subject:\"simple phrase\"";
|
||||||
|
out = qp.parse(t);
|
||||||
|
assertNotNull(t+" sanity test gave back null", out);
|
||||||
|
assertTrue(t+" sanity test isn't PhraseQuery: " + out.getClass(),
|
||||||
|
out instanceof PhraseQuery);
|
||||||
|
assertEquals(t+" sanity test is wrong field", "subject",
|
||||||
|
((PhraseQuery)out).getTerms()[0].field());
|
||||||
|
|
||||||
|
|
||||||
|
/* now some tests that use aliasing */
|
||||||
|
|
||||||
|
/* basic usage of single "term" */
|
||||||
|
t = "hoss:XXXXXXXX";
|
||||||
|
out = qp.parse(t);
|
||||||
|
assertNotNull(t+" was null", out);
|
||||||
|
assertTrue(t+" wasn't a DMQ:" + out.getClass(),
|
||||||
|
out instanceof DisjunctionMaxQuery);
|
||||||
|
assertEquals(t+" wrong number of clauses", 4,
|
||||||
|
countItems(((DisjunctionMaxQuery)out).iterator()));
|
||||||
|
|
||||||
|
|
||||||
|
/* odd case, but should still work, DMQ of one clause */
|
||||||
|
t = "test:YYYYY";
|
||||||
|
out = qp.parse(t);
|
||||||
|
assertNotNull(t+" was null", out);
|
||||||
|
assertTrue(t+" wasn't a DMQ:" + out.getClass(),
|
||||||
|
out instanceof DisjunctionMaxQuery);
|
||||||
|
assertEquals(t+" wrong number of clauses", 1,
|
||||||
|
countItems(((DisjunctionMaxQuery)out).iterator()));
|
||||||
|
|
||||||
|
/* basic usage of multiple "terms" */
|
||||||
|
t = "hoss:XXXXXXXX test:YYYYY";
|
||||||
|
out = qp.parse(t);
|
||||||
|
assertNotNull(t+" was null", out);
|
||||||
|
assertTrue(t+" wasn't a boolean:" + out.getClass(),
|
||||||
|
out instanceof BooleanQuery);
|
||||||
|
{
|
||||||
|
BooleanQuery bq = (BooleanQuery)out;
|
||||||
|
assertEquals(t+" wrong number of clauses", 2,
|
||||||
|
bq.getClauses().length);
|
||||||
|
Query sub = bq.getClauses()[0].getQuery();
|
||||||
|
assertTrue(t+" first wasn't a DMQ:" + sub.getClass(),
|
||||||
|
sub instanceof DisjunctionMaxQuery);
|
||||||
|
assertEquals(t+" first had wrong number of clauses", 4,
|
||||||
|
countItems(((DisjunctionMaxQuery)sub).iterator()));
|
||||||
|
sub = bq.getClauses()[1].getQuery();
|
||||||
|
assertTrue(t+" second wasn't a DMQ:" + sub.getClass(),
|
||||||
|
sub instanceof DisjunctionMaxQuery);
|
||||||
|
assertEquals(t+" second had wrong number of clauses", 1,
|
||||||
|
countItems(((DisjunctionMaxQuery)sub).iterator()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* a phrase, and a term that is a stop word for some fields */
|
||||||
|
t = "hoss:\"XXXXXX YYYYY\" hoss:the";
|
||||||
|
out = qp.parse(t);
|
||||||
|
assertNotNull(t+" was null", out);
|
||||||
|
assertTrue(t+" wasn't a boolean:" + out.getClass(),
|
||||||
|
out instanceof BooleanQuery);
|
||||||
|
{
|
||||||
|
BooleanQuery bq = (BooleanQuery)out;
|
||||||
|
assertEquals(t+" wrong number of clauses", 2,
|
||||||
|
bq.getClauses().length);
|
||||||
|
Query sub = bq.getClauses()[0].getQuery();
|
||||||
|
assertTrue(t+" first wasn't a DMQ:" + sub.getClass(),
|
||||||
|
sub instanceof DisjunctionMaxQuery);
|
||||||
|
assertEquals(t+" first had wrong number of clauses", 4,
|
||||||
|
countItems(((DisjunctionMaxQuery)sub).iterator()));
|
||||||
|
sub = bq.getClauses()[1].getQuery();
|
||||||
|
assertTrue(t+" second wasn't a DMQ:" + sub.getClass(),
|
||||||
|
sub instanceof DisjunctionMaxQuery);
|
||||||
|
assertEquals(t+" second had wrong number of clauses (stop words)", 2,
|
||||||
|
countItems(((DisjunctionMaxQuery)sub).iterator()));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int countItems(Iterator i) {
|
||||||
|
int count = 0;
|
||||||
|
while (i.hasNext()) {
|
||||||
|
count++;
|
||||||
|
i.next();
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testMinShouldMatchCalculator() {
|
||||||
|
|
||||||
|
/* zero is zero is zero */
|
||||||
|
assertEquals(0, calcMSM(5, "0"));
|
||||||
|
assertEquals(0, calcMSM(5, "0%"));
|
||||||
|
assertEquals(0, calcMSM(5, "-5"));
|
||||||
|
assertEquals(0, calcMSM(5, "-100%"));
|
||||||
|
|
||||||
|
/* basic integers */
|
||||||
|
assertEquals(3, calcMSM(5, "3"));
|
||||||
|
assertEquals(2, calcMSM(5, "-3"));
|
||||||
|
assertEquals(3, calcMSM(3, "3"));
|
||||||
|
assertEquals(0, calcMSM(3, "-3"));
|
||||||
|
assertEquals(3, calcMSM(3, "5"));
|
||||||
|
assertEquals(0, calcMSM(3, "-5"));
|
||||||
|
|
||||||
|
/* positive percentages with rounding */
|
||||||
|
assertEquals(0, calcMSM(3, "25%"));
|
||||||
|
assertEquals(1, calcMSM(4, "25%"));
|
||||||
|
assertEquals(1, calcMSM(5, "25%"));
|
||||||
|
assertEquals(2, calcMSM(10, "25%"));
|
||||||
|
|
||||||
|
/* negative percentages with rounding */
|
||||||
|
assertEquals(3, calcMSM(3, "-25%"));
|
||||||
|
assertEquals(3, calcMSM(4, "-25%"));
|
||||||
|
assertEquals(4, calcMSM(5, "-25%"));
|
||||||
|
assertEquals(8, calcMSM(10, "-25%"));
|
||||||
|
|
||||||
|
/* conditional */
|
||||||
|
assertEquals(1, calcMSM(1, "3<0"));
|
||||||
|
assertEquals(2, calcMSM(2, "3<0"));
|
||||||
|
assertEquals(3, calcMSM(3, "3<0"));
|
||||||
|
assertEquals(0, calcMSM(4, "3<0"));
|
||||||
|
assertEquals(0, calcMSM(5, "3<0"));
|
||||||
|
assertEquals(1, calcMSM(1, "3<25%"));
|
||||||
|
assertEquals(2, calcMSM(2, "3<25%"));
|
||||||
|
assertEquals(3, calcMSM(3, "3<25%"));
|
||||||
|
assertEquals(1, calcMSM(4, "3<25%"));
|
||||||
|
assertEquals(1, calcMSM(5, "3<25%"));
|
||||||
|
|
||||||
|
/* multiple conditionals */
|
||||||
|
assertEquals(1, calcMSM(1, "3<-25% 10<-3"));
|
||||||
|
assertEquals(2, calcMSM(2, "3<-25% 10<-3"));
|
||||||
|
assertEquals(3, calcMSM(3, "3<-25% 10<-3"));
|
||||||
|
assertEquals(3, calcMSM(4, "3<-25% 10<-3"));
|
||||||
|
assertEquals(4, calcMSM(5, "3<-25% 10<-3"));
|
||||||
|
assertEquals(5, calcMSM(6, "3<-25% 10<-3"));
|
||||||
|
assertEquals(6, calcMSM(7, "3<-25% 10<-3"));
|
||||||
|
assertEquals(6, calcMSM(8, "3<-25% 10<-3"));
|
||||||
|
assertEquals(7, calcMSM(9, "3<-25% 10<-3"));
|
||||||
|
assertEquals(8, calcMSM(10, "3<-25% 10<-3"));
|
||||||
|
assertEquals(8, calcMSM(11, "3<-25% 10<-3"));
|
||||||
|
assertEquals(9, calcMSM(12, "3<-25% 10<-3"));
|
||||||
|
assertEquals(97, calcMSM(100, "3<-25% 10<-3"));
|
||||||
|
|
||||||
|
BooleanQuery q = new BooleanQuery();
|
||||||
|
q.add(new TermQuery(new Term("a","b")), Occur.SHOULD);
|
||||||
|
q.add(new TermQuery(new Term("a","c")), Occur.SHOULD);
|
||||||
|
q.add(new TermQuery(new Term("a","d")), Occur.SHOULD);
|
||||||
|
q.add(new TermQuery(new Term("a","d")), Occur.SHOULD);
|
||||||
|
|
||||||
|
SolrPluginUtils.setMinShouldMatch(q, "0");
|
||||||
|
assertEquals(0, q.getMinimumNumberShouldMatch());
|
||||||
|
|
||||||
|
SolrPluginUtils.setMinShouldMatch(q, "1");
|
||||||
|
assertEquals(1, q.getMinimumNumberShouldMatch());
|
||||||
|
|
||||||
|
SolrPluginUtils.setMinShouldMatch(q, "50%");
|
||||||
|
assertEquals(2, q.getMinimumNumberShouldMatch());
|
||||||
|
|
||||||
|
SolrPluginUtils.setMinShouldMatch(q, "99");
|
||||||
|
assertEquals(4, q.getMinimumNumberShouldMatch());
|
||||||
|
|
||||||
|
q.add(new TermQuery(new Term("a","e")), Occur.MUST);
|
||||||
|
q.add(new TermQuery(new Term("a","f")), Occur.MUST);
|
||||||
|
|
||||||
|
SolrPluginUtils.setMinShouldMatch(q, "50%");
|
||||||
|
assertEquals(2, q.getMinimumNumberShouldMatch());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/** macro */
|
||||||
|
public String pe(CharSequence s) {
|
||||||
|
return SolrPluginUtils.partialEscape(s).toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** macro */
|
||||||
|
public String strip(CharSequence s) {
|
||||||
|
return SolrPluginUtils.stripUnbalancedQuotes(s).toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** macro */
|
||||||
|
public int calcMSM(int clauses, String spec) {
|
||||||
|
return SolrPluginUtils.calculateMinShouldMatch(clauses, spec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -166,7 +166,23 @@
|
||||||
The "standard" request handler is the default and will be used if qt
|
The "standard" request handler is the default and will be used if qt
|
||||||
is not specified in the request.
|
is not specified in the request.
|
||||||
-->
|
-->
|
||||||
<requestHandler name="standard" class="solr.StandardRequestHandler" />
|
<requestHandler name="standard" class="solr.StandardRequestHandler"/>
|
||||||
|
<requestHandler name="dismax" class="solr.DisMaxRequestHandler" >
|
||||||
|
<float name="tie">0.01</float>
|
||||||
|
<str name="qf">
|
||||||
|
text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0
|
||||||
|
</str>
|
||||||
|
<str name="pf">
|
||||||
|
text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5
|
||||||
|
</str>
|
||||||
|
<str name="bf">
|
||||||
|
ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3
|
||||||
|
</str>
|
||||||
|
<str name="mm">
|
||||||
|
3<-1 5<-2 6<90%
|
||||||
|
</str>
|
||||||
|
<int name="ps">100</int>
|
||||||
|
</requestHandler>
|
||||||
<requestHandler name="old" class="solr.tst.OldRequestHandler" >
|
<requestHandler name="old" class="solr.tst.OldRequestHandler" >
|
||||||
<int name="myparam">1000</int>
|
<int name="myparam">1000</int>
|
||||||
<float name="ratio">1.4142135</float>
|
<float name="ratio">1.4142135</float>
|
||||||
|
|
Loading…
Reference in New Issue