highlighting: SOLR-24

git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@421678 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2006-07-13 18:22:24 +00:00
parent bf363aafc7
commit 1c06c77393
5 changed files with 380 additions and 273 deletions

View File

@ -18,14 +18,15 @@ New Features
10. copyField accepts dynamicfield-like names as the source. 10. copyField accepts dynamicfield-like names as the source.
(Darren Erik Vengroff via yonik, SOLR-21) (Darren Erik Vengroff via yonik, SOLR-21)
11. new DocSet.andNot(), DocSet.andNotSize() (yonik) 11. new DocSet.andNot(), DocSet.andNotSize() (yonik)
12. Ability to store term vectors. (Note: standard request handler does 12. Ability to store term vectors for fields. (Mike Klaas via yonik, SOLR-23)
not currently do anything with term vectors) (Mike Klaas via yonik, SOLR-23)
13. New abstract BufferedTokenStream for people who want to write 13. New abstract BufferedTokenStream for people who want to write
Tokenizers or TokenFilters that require arbitrary buffering of the Tokenizers or TokenFilters that require arbitrary buffering of the
stream. (SOLR-11 / yonik, hossman) stream. (SOLR-11 / yonik, hossman)
14. New RemoveDuplicatesToken - useful in situations where 14. New RemoveDuplicatesToken - useful in situations where
synonyms, stemming, or word-deliminater-ing produce identical tokens at synonyms, stemming, or word-deliminater-ing produce identical tokens at
the same position. (SOLR-11 / yonik, hossman) the same position. (SOLR-11 / yonik, hossman)
15. Added highlighting to SolrPluginUtils and implemented in StandardRequestHandler
and DisMaxRequestHandler (SOLR-24 / Mike Klaas via hossman,yonik)
Changes in runtime behavior Changes in runtime behavior
1. classes reorganized into different packages, package names changed to Apache 1. classes reorganized into different packages, package names changed to Apache

View File

@ -41,6 +41,7 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.util.StrUtils; import org.apache.solr.util.StrUtils;
import org.apache.solr.util.NamedList; import org.apache.solr.util.NamedList;
import org.apache.solr.util.SolrPluginUtils; import org.apache.solr.util.SolrPluginUtils;
import org.apache.solr.util.DisMaxParams;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
@ -161,7 +162,7 @@ public class DisMaxRequestHandler
/* :NOOP */ /* :NOOP */
} }
protected final U.CommonParams params = new U.CommonParams(); protected final DisMaxParams params = new DisMaxParams();
public DisMaxRequestHandler() { public DisMaxRequestHandler() {
super(); super();
@ -219,6 +220,7 @@ public class DisMaxRequestHandler
try { try {
int flags = 0;
SolrIndexSearcher s = req.getSearcher(); SolrIndexSearcher s = req.getSearcher();
IndexSchema schema = req.getSchema(); IndexSchema schema = req.getSchema();
@ -267,7 +269,7 @@ public class DisMaxRequestHandler
if (dis instanceof BooleanQuery) { if (dis instanceof BooleanQuery) {
BooleanQuery t = new BooleanQuery(); BooleanQuery t = new BooleanQuery();
U.flatenBooleanQuery(t, (BooleanQuery)dis); U.flattenBooleanQuery(t, (BooleanQuery)dis);
U.setMinShouldMatch(t, minShouldMatch); U.setMinShouldMatch(t, minShouldMatch);
@ -332,19 +334,19 @@ public class DisMaxRequestHandler
/* * * Generate Main Results * * */ /* * * Generate Main Results * * */
flags |= U.setReturnFields(U.getParam(req, params.FL, params.fl), rsp);
DocList results = s.getDocList(query, restrictions, DocList results = s.getDocList(query, restrictions,
SolrPluginUtils.getSort(req), SolrPluginUtils.getSort(req),
req.getStart(), req.getLimit(), req.getStart(), req.getLimit(),
SolrIndexSearcher.GET_SCORES); flags);
rsp.add("search-results",results); rsp.add("search-results",results);
U.setReturnFields(U.getParam(req, params.FL, params.fl), rsp);
/* * * Debugging Info * * */ /* * * Debugging Info * * */
try { try {
NamedList debug = U.doStandardDebug(req, userQuery, query, results); NamedList debug = U.doStandardDebug(req, userQuery, query, results, params);
if (null != debug) { if (null != debug) {
debug.add("boostquery", boostQuery); debug.add("boostquery", boostQuery);
debug.add("boostfunc", boostFunc); debug.add("boostfunc", boostFunc);
@ -364,6 +366,18 @@ public class DisMaxRequestHandler
rsp.add("exception_during_debug", SolrException.toStr(e)); rsp.add("exception_during_debug", SolrException.toStr(e));
} }
/* * * Highlighting/Summarizing * * */
if(U.getBooleanParam(req, params.HIGHLIGHT, params.highlight)) {
BooleanQuery highlightQuery = new BooleanQuery();
U.flattenBooleanQuery(highlightQuery, query);
NamedList sumData = U.doStandardHighlighting(results, highlightQuery,
req, params,
queryFields.keySet().toArray(new String[0]));
if(sumData != null)
rsp.add("highlighting", sumData);
}
} catch (Exception e) { } catch (Exception e) {
SolrException.log(SolrCore.log,e); SolrException.log(SolrCore.log,e);
rsp.setException(e); rsp.setException(e);

View File

@ -29,6 +29,8 @@ import java.net.URL;
import org.apache.solr.util.StrUtils; import org.apache.solr.util.StrUtils;
import org.apache.solr.util.NamedList; import org.apache.solr.util.NamedList;
import org.apache.solr.util.SolrPluginUtils;
import org.apache.solr.util.CommonParams;
import org.apache.solr.search.*; import org.apache.solr.search.*;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
@ -47,14 +49,18 @@ public class StandardRequestHandler implements SolrRequestHandler, SolrInfoMBean
long numRequests; long numRequests;
long numErrors; long numErrors;
/** shorten the class referneces for utilities */
private static class U extends SolrPluginUtils {
/* :NOOP */
}
/** parameters garnered from config file */
protected final CommonParams params = new CommonParams();
public void init(NamedList args) { public void init(NamedList args) {
SolrCore.log.log(Level.INFO, "Unused request handler arguments:" + args); params.setValues(args);
} }
private final Pattern splitList=Pattern.compile(",| ");
public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) { public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
numRequests++; numRequests++;
@ -63,24 +69,14 @@ public class StandardRequestHandler implements SolrRequestHandler, SolrInfoMBean
// we need to un-escape them before we pass to QueryParser // we need to un-escape them before we pass to QueryParser
try { try {
String sreq = req.getQueryString(); String sreq = req.getQueryString();
String debug = req.getParam("debugQuery"); String debug = U.getParam(req, params.DEBUG_QUERY, params.debugQuery);
String defaultField = req.getParam("df"); String defaultField = U.getParam(req, params.DF, params.df);
// find fieldnames to return (fieldlist) // find fieldnames to return (fieldlist)
String fl = req.getParam("fl"); String fl = U.getParam(req, params.FL, params.fl);
int flags=0; int flags = 0;
if (fl != null) { if (fl != null) {
// TODO - this could become more efficient if widely used. flags |= U.setReturnFields(fl, rsp);
// TODO - should field order be maintained?
String[] flst = splitList.split(fl,0);
if (flst.length > 0 && !(flst.length==1 && flst[0].length()==0)) {
Set<String> set = new HashSet<String>();
for (String fname : flst) {
if ("score".equals(fname)) flags |= SolrIndexSearcher.GET_SCORES;
set.add(fname);
}
rsp.setReturnFields(set);
}
} }
if (sreq==null) throw new SolrException(400,"Missing queryString"); if (sreq==null) throw new SolrException(400,"Missing queryString");
@ -104,25 +100,20 @@ public class StandardRequestHandler implements SolrRequestHandler, SolrInfoMBean
DocList results = req.getSearcher().getDocList(query, null, sort, req.getStart(), req.getLimit(), flags); DocList results = req.getSearcher().getDocList(query, null, sort, req.getStart(), req.getLimit(), flags);
rsp.add(null,results); rsp.add(null,results);
if (debug!=null) {
NamedList dbg = new NamedList();
try { try {
dbg.add("querystring",qs); NamedList dbg = U.doStandardDebug(req, qs, query, results, params);
dbg.add("parsedquery",QueryParsing.toString(query,req.getSchema())); if (null != dbg)
dbg.add("explain", getExplainList(query, results, req.getSearcher(), req.getSchema())); rsp.add("debug", dbg);
String otherQueryS = req.getParam("explainOther");
if (otherQueryS != null && otherQueryS.length() > 0) {
DocList otherResults = doQuery(otherQueryS,req.getSearcher(), req.getSchema(),0,10);
dbg.add("otherQuery",otherQueryS);
dbg.add("explainOther", getExplainList(query, otherResults, req.getSearcher(), req.getSchema()));
}
} catch (Exception e) { } catch (Exception e) {
SolrException.logOnce(SolrCore.log,"Exception during debug:",e); SolrException.logOnce(SolrCore.log, "Exception durring debug", e);
dbg.add("exception_during_debug", SolrException.toStr(e)); rsp.add("exception_during_debug", SolrException.toStr(e));
}
rsp.add("debug",dbg);
} }
NamedList sumData = SolrPluginUtils.doStandardHighlighting(
results, query, req, params, new String[]{defaultField});
if(sumData != null)
rsp.add("highlighting", sumData);
} catch (SolrException e) { } catch (SolrException e) {
rsp.setException(e); rsp.setException(e);
numErrors++; numErrors++;
@ -135,52 +126,6 @@ public class StandardRequestHandler implements SolrRequestHandler, SolrInfoMBean
} }
} }
private NamedList getExplainList(Query query, DocList results, SolrIndexSearcher searcher, IndexSchema schema) throws IOException {
NamedList explainList = new NamedList();
DocIterator iterator = results.iterator();
for (int i=0; i<results.size(); i++) {
int id = iterator.nextDoc();
Explanation explain = searcher.explain(query, id);
//explainList.add(Integer.toString(id), explain.toString().split("\n"));
Document doc = searcher.doc(id);
String strid = schema.printableUniqueKey(doc);
String docname = "";
if (strid != null) docname="id="+strid+",";
docname = docname + "internal_docid="+id;
explainList.add(docname, "\n" +explain.toString());
}
return explainList;
}
private DocList doQuery(String sreq, SolrIndexSearcher searcher, IndexSchema schema, int start, int limit) throws IOException {
List<String> commands = StrUtils.splitSmart(sreq,';');
String qs = commands.size() >= 1 ? commands.get(0) : "";
Query query = QueryParsing.parseQuery(qs, schema);
// If the first non-query, non-filter command is a simple sort on an indexed field, then
// we can use the Lucene sort ability.
Sort sort = null;
if (commands.size() >= 2) {
QueryParsing.SortSpec sortSpec = QueryParsing.parseSort(commands.get(1), schema);
if (sortSpec != null) {
sort = sortSpec.getSort();
if (sortSpec.getCount() >= 0) {
limit = sortSpec.getCount();
}
}
}
DocList results = searcher.getDocList(query,(DocSet)null, sort, start, limit);
return results;
}
//////////////////////// SolrInfoMBeans methods ////////////////////// //////////////////////// SolrInfoMBeans methods //////////////////////

View File

@ -16,6 +16,7 @@
package org.apache.solr.util; package org.apache.solr.util;
import org.apache.solr.core.Config; // highlighting
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoMBean; import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.core.SolrException; import org.apache.solr.core.SolrException;
@ -40,6 +41,7 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.util.StrUtils; import org.apache.solr.util.StrUtils;
import org.apache.solr.util.NamedList; import org.apache.solr.util.NamedList;
import org.apache.solr.util.XML;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
@ -53,9 +55,22 @@ import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.ConstantScoreRangeQuery; import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.highlight.Highlighter; // highlighting
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.NullFragmenter;
import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Token;
import org.xmlpull.v1.XmlPullParserException; import org.xmlpull.v1.XmlPullParserException;
@ -73,6 +88,8 @@ import java.util.Map;
import java.util.HashMap; import java.util.HashMap;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter; // highlighting
import java.net.URL; import java.net.URL;
/** /**
@ -84,6 +101,9 @@ import java.net.URL;
* </p> * </p>
* *
* <p>:TODO: refactor StandardRequestHandler to use these utilities</p> * <p>:TODO: refactor StandardRequestHandler to use these utilities</p>
*
* <p>:TODO: Many "standard" functionality methods are not cognisant of
* default parameter settings.
*/ */
public class SolrPluginUtils { public class SolrPluginUtils {
@ -108,6 +128,8 @@ public class SolrPluginUtils {
String param, String def) { String param, String def) {
String v = req.getParam(param); String v = req.getParam(param);
// Note: parameters passed but given only white-space value are
// considered equvalent to passing nothing for that parameter.
if (null == v || "".equals(v.trim())) { if (null == v || "".equals(v.trim())) {
return def; return def;
} }
@ -134,7 +156,18 @@ public class SolrPluginUtils {
return r; return r;
} }
/**
* Treats parameter value as a boolean. The string 'false' is false;
* any other non-empty string is true.
*/
public static boolean getBooleanParam(SolrQueryRequest req,
String param, boolean def) {
String v = req.getParam(param);
if (null == v || "".equals(v.trim())) {
return def;
}
return !"false".equals(v.trim());
}
private final static Pattern splitList=Pattern.compile(",| "); private final static Pattern splitList=Pattern.compile(",| ");
@ -142,29 +175,36 @@ public class SolrPluginUtils {
* Assumes the standard query param of "fl" to specify the return fields * Assumes the standard query param of "fl" to specify the return fields
* @see #setReturnFields(String,SolrQueryResponse) * @see #setReturnFields(String,SolrQueryResponse)
*/ */
public static void setReturnFields(SolrQueryRequest req, public static int setReturnFields(SolrQueryRequest req,
SolrQueryResponse res) { SolrQueryResponse res) {
setReturnFields(req.getParam(FL), res); return setReturnFields(req.getParam(FL), res);
} }
/** /**
* Given a space seperated list of field names, sets the field list on the * Given a space seperated list of field names, sets the field list on the
* SolrQueryResponse. * SolrQueryResponse.
*
* @return bitfield of SolrIndexSearcher flags that need to be set
*/ */
public static void setReturnFields(String fl, public static int setReturnFields(String fl,
SolrQueryResponse res) { SolrQueryResponse res) {
int flags = 0;
if (fl != null) { if (fl != null) {
// TODO - this could become more efficient if widely used. // TODO - this could become more efficient if widely used.
// TODO - should field order be maintained? // TODO - should field order be maintained?
String[] flst = splitList.split(fl.trim(),0); String[] flst = splitList.split(fl.trim(),0);
if (flst.length > 0 && !(flst.length==1 && flst[0].length()==0)) { if (flst.length > 0 && !(flst.length==1 && flst[0].length()==0)) {
Set<String> set = new HashSet<String>(); Set<String> set = new HashSet<String>();
for (String fname : flst) set.add(fname); for (String fname : flst) {
if("score".equalsIgnoreCase(fname))
flags |= SolrIndexSearcher.GET_SCORES;
set.add(fname);
}
res.setReturnFields(set); res.setReturnFields(set);
} }
} }
return flags;
} }
/** /**
@ -201,24 +241,24 @@ public class SolrPluginUtils {
* @param query the query built from the userQuery * @param query the query built from the userQuery
* (and perhaps other clauses) that identifies the main * (and perhaps other clauses) that identifies the main
* result set of the response. * result set of the response.
* @param results the main result set of hte response * @param results the main result set of the response
*/ */
public static NamedList doStandardDebug(SolrQueryRequest req, public static NamedList doStandardDebug(SolrQueryRequest req,
String userQuery, String userQuery,
Query query, Query query,
DocList results) DocList results,
CommonParams params)
throws IOException { throws IOException {
String debug = getParam(req, params.DEBUG_QUERY, params.debugQuery);
String debug = req.getParam("debugQuery");
NamedList dbg = null; NamedList dbg = null;
if (debug!=null) { if (debug!=null) {
dbg = new NamedList(); dbg = new NamedList();
/* userQuery may have been pre-processes .. expose that */ /* userQuery may have been pre-processes .. expose that */
dbg.add("rawquerystring",req.getQueryString()); dbg.add("rawquerystring", req.getQueryString());
dbg.add("querystring",userQuery); dbg.add("querystring", userQuery);
/* QueryParsing.toString isn't perfect, use it to see converted /* QueryParsing.toString isn't perfect, use it to see converted
* values, use regular toString to see any attributes of the * values, use regular toString to see any attributes of the
@ -274,6 +314,177 @@ public class SolrPluginUtils {
return explainList; return explainList;
} }
/**
* Retrieve a default Highlighter instance for a given query.
*
* @param query Query instance
*/
public static Highlighter getDefaultHighlighter(Query query) {
Highlighter highlighter = new Highlighter(
new SimpleHTMLFormatter("<em>", "</em>"),
new QueryScorer(query));
highlighter.setTextFragmenter(new GapFragmenter());
return highlighter;
}
/**
* Generates a list of Highlighted query fragments for each item in a list
* of documents. Convenience method that constructs a Highlighter from a
* Query.
*
* @param docs query results
* @param fieldNames list of fields to summarize
* @param query resulting query object
* @param searcher the SolrIndexSearcher corresponding to a request
* @param numFragments maximum number of summary fragments to return for
* a given field
*/
public static NamedList getHighlights(DocList docs,
String[] fieldNames,
Query query,
SolrIndexSearcher searcher,
int numFragments
) throws IOException {
return getHighlights(docs, fieldNames, searcher,
getDefaultHighlighter(query), numFragments);
}
/**
* Generates a list of Highlighted query fragments for each item in a list
* of documents
*
* @param docs query results
* @param fieldNames list of fields to summarize
* @param searcher the SolrIndexSearcher corresponding to a request
* @param numFragments maximum number of summary fragments to return for
* a given field
* @param highlighter a customized Highlighter instance
*
* @return NamedList containing a NamedList for each document, which in
* turns contains sets (field, summary) pairs.
*/
public static NamedList getHighlights(DocList docs,
String[] fieldNames,
SolrIndexSearcher searcher,
Highlighter highlighter,
int numFragments
) throws IOException {
NamedList fragments = new NamedList();
DocIterator iterator = docs.iterator();
for (int i=0; i<docs.size(); i++) {
int docId = iterator.nextDoc();
// use the Searcher's doc cache
Document doc = searcher.doc(docId);
NamedList docSummaries = new NamedList();
for(String fieldName : fieldNames) {
fieldName = fieldName.trim();
String[] docTexts = doc.getValues(fieldName);
if(docTexts == null)
continue;
String[] summaries;
TextFragment[] frag;
if(docTexts.length == 1) {
// single-valued field
TokenStream tstream;
try {
// attempt term vectors
tstream = TokenSources.getTokenStream(
searcher.getReader(), docId, fieldName);
} catch (IllegalArgumentException e) {
// fall back to analyzer
tstream = searcher.getSchema().getAnalyzer().tokenStream(
fieldName, new StringReader(docTexts[0]));
}
frag = highlighter.getBestTextFragments(
tstream, docTexts[0], false, numFragments);
} else {
// multi-valued field
MultiValueTokenStream tstream;
tstream = new MultiValueTokenStream(fieldName,
docTexts,
searcher.getSchema().getAnalyzer());
frag = highlighter.getBestTextFragments(
tstream, tstream.asSingleValue(), false, numFragments);
}
// convert fragments back into text
// TODO: we can include score and position information in output as
// snippet attributes
if(frag.length > 0) {
ArrayList fragTexts = new ArrayList();
for (int j = 0; j < frag.length; j++) {
if ((frag[j] != null) && (frag[j].getScore() > 0)) {
fragTexts.add(frag[j].toString());
}
}
summaries = (String[]) fragTexts.toArray(new String[0]);
if(summaries.length > 0)
docSummaries.add(fieldName, summaries);
}
}
String printId = searcher.getSchema().printableUniqueKey(doc);
fragments.add(printId == null ? null : printId, docSummaries);
}
return fragments;
}
/**
* Perform highlighting of selected fields.
*
* @param docs query results
* @param query the (possibly re-written query)
* @param req associated SolrQueryRequest
* @param defaultFields default search field list
*
* @return NamedList containing summary data, or null if highlighting is
* disabled.
*
*/
public static NamedList doStandardHighlighting(DocList docs,
Query query,
SolrQueryRequest req,
CommonParams params,
String[] defaultFields
) throws IOException {
if(!getBooleanParam(req, params.HIGHLIGHT, params.highlight))
return null;
String fieldParam = getParam(req, params.HIGHLIGHT_FIELDS,
params.highlightFields);
String fields[];
if(fieldParam == null || fieldParam.trim().equals("")) {
// use default search field if highlight fieldlist not specified.
if (defaultFields == null || defaultFields.length == 0 ||
defaultFields[0] == null) {
fields = new String[]{req.getSchema().getDefaultSearchFieldName()};
} else
fields = defaultFields;
} else
fields = splitList.split(fieldParam.trim());
Highlighter highlighter;
String formatterSpec = getParam(req, params.HIGHLIGHT_FORMATTER_CLASS,
params.highlightFormatterClass);
if(formatterSpec == null || formatterSpec.equals("")) {
highlighter = getDefaultHighlighter(query);
} else {
highlighter = new Highlighter(
(Formatter)Config.newInstance(formatterSpec),
new QueryScorer(query));
highlighter.setTextFragmenter(new GapFragmenter());
}
int numFragments = getNumberParam(req, params.MAX_SNIPPETS,
params.maxSnippets).intValue();
return getHighlights(
docs,
fields,
req.getSearcher(),
highlighter,
numFragments);
}
/** /**
* Executes a basic query in lucene syntax * Executes a basic query in lucene syntax
*/ */
@ -455,7 +666,7 @@ public class SolrPluginUtils {
* so do not attempt to reuse it. * so do not attempt to reuse it.
* </p> * </p>
*/ */
public static void flatenBooleanQuery(BooleanQuery to, BooleanQuery from) { public static void flattenBooleanQuery(BooleanQuery to, BooleanQuery from) {
BooleanClause[] c = from.getClauses(); BooleanClause[] c = from.getClauses();
for (int i = 0; i < c.length; i++) { for (int i = 0; i < c.length; i++) {
@ -468,7 +679,7 @@ public class SolrPluginUtils {
&& !c[i].isProhibited()) { && !c[i].isProhibited()) {
/* we can recurse */ /* we can recurse */
flatenBooleanQuery(to, (BooleanQuery)ci); flattenBooleanQuery(to, (BooleanQuery)ci);
} else { } else {
to.add(c[i]); to.add(c[i]);
@ -512,169 +723,6 @@ public class SolrPluginUtils {
return s.toString().replace("\"",""); return s.toString().replace("\"","");
} }
/**
* A collection on common params, both for Plugin initialization and
* for Requests.
*/
public static class CommonParams {
/** query and init param for tiebreaker value */
public static String TIE = "tie";
/** query and init param for query fields */
public static String QF = "qf";
/** query and init param for phrase boost fields */
public static String PF = "pf";
/** query and init param for MinShouldMatch specification */
public static String MM = "mm";
/** query and init param for Phrase Slop value */
public static String PS = "ps";
/** query and init param for boosting query */
public static String BQ = "bq";
/** query and init param for boosting functions */
public static String BF = "bf";
/** query and init param for filtering query */
public static String FQ = "fq";
/** query and init param for field list */
public static String FL = "fl";
/** query and init param for field list */
public static String GEN = "gen";
/** the default tie breaker to use in DisjunctionMaxQueries */
public float tiebreaker = 0.0f;
/** the default query fields to be used */
public String qf = null;
/** the default phrase boosting fields to be used */
public String pf = null;
/** the default min should match to be used */
public String mm = "100%";
/** the default phrase slop to be used */
public int pslop = 0;
/** the default boosting query to be used */
public String bq = null;
/** the default boosting functions to be used */
public String bf = null;
/** the default filtering query to be used */
public String fq = null;
/** the default field list to be used */
public String fl = null;
public CommonParams() {
/* :NOOP: */
}
/** @see #setValues */
public CommonParams(NamedList args) {
this();
setValues(args);
}
/**
* Sets the params using values from a NamedList, usefull in the
* init method for your handler.
*
* <p>
* If any param is not of the expected type, a severe error is
* logged,and the param is skipped.
* </p>
*
* <p>
* If any param is not of in the NamedList, it is skipped and the
* old value is left alone.
* </p>
*
*/
public void setValues(NamedList args) {
Object tmp;
tmp = args.get(TIE);
if (null != tmp) {
if (tmp instanceof Float) {
tiebreaker = ((Float)tmp).floatValue();
} else {
SolrCore.log.severe("init param is not a float: " + TIE);
}
}
tmp = args.get(QF);
if (null != tmp) {
if (tmp instanceof String) {
qf = tmp.toString();
} else {
SolrCore.log.severe("init param is not a str: " + QF);
}
}
tmp = args.get(PF);
if (null != tmp) {
if (tmp instanceof String) {
pf = tmp.toString();
} else {
SolrCore.log.severe("init param is not a str: " + PF);
}
}
tmp = args.get(MM);
if (null != tmp) {
if (tmp instanceof String) {
mm = tmp.toString();
} else {
SolrCore.log.severe("init param is not a str: " + MM);
}
}
tmp = args.get(PS);
if (null != tmp) {
if (tmp instanceof Integer) {
pslop = ((Integer)tmp).intValue();
} else {
SolrCore.log.severe("init param is not an int: " + PS);
}
}
tmp = args.get(BQ);
if (null != tmp) {
if (tmp instanceof String) {
bq = tmp.toString();
} else {
SolrCore.log.severe("init param is not a str: " + BQ);
}
}
tmp = args.get(BF);
if (null != tmp) {
if (tmp instanceof String) {
bf = tmp.toString();
} else {
SolrCore.log.severe("init param is not a str: " + BF);
}
}
tmp = args.get(FQ);
if (null != tmp) {
if (tmp instanceof String) {
fq = tmp.toString();
} else {
SolrCore.log.severe("init param is not a str: " + FQ);
}
}
tmp = args.get(FL);
if (null != tmp) {
if (tmp instanceof String) {
fl = tmp.toString();
} else {
SolrCore.log.severe("init param is not a str: " + FL);
}
}
}
}
/** /**
* A subclass of SolrQueryParser that supports aliasing fields for * A subclass of SolrQueryParser that supports aliasing fields for
* constructing DisjunctionMaxQueries. * constructing DisjunctionMaxQueries.
@ -763,8 +811,6 @@ public class SolrPluginUtils {
} }
/** /**
* Determines the correct Sort based on the request parameter "sort" * Determines the correct Sort based on the request parameter "sort"
* *
@ -818,6 +864,105 @@ public class SolrPluginUtils {
} }
} }
}
/**
* Helper class which creates a single TokenStream out of values from a
* multi-valued field.
*/
class MultiValueTokenStream extends TokenStream {
private String fieldName;
private String[] values;
private Analyzer analyzer;
private int curIndex; // next index into the values array
private int curOffset; // offset into concatenated string
private TokenStream currentStream; // tokenStream currently being iterated
/** Constructs a TokenStream for consecutively-analyzed field values
*
* @param fieldName name of the field
* @param values array of field data
* @param analyzer analyzer instance
*/
public MultiValueTokenStream(String fieldName, String[] values,
Analyzer analyzer) {
this.fieldName = fieldName;
this.values = values;
this.analyzer = analyzer;
curIndex = -1;
curOffset = 0;
currentStream = null;
}
/** Returns the next token in the stream, or null at EOS. */
public Token next() throws IOException {
int extra = 0;
if(currentStream == null) {
curIndex++;
if(curIndex < values.length) {
currentStream = analyzer.tokenStream(fieldName,
new StringReader(values[curIndex]));
// add extra space between multiple values
if(curIndex > 0)
extra = analyzer.getPositionIncrementGap(fieldName);
} else {
return null;
}
}
Token nextToken = currentStream.next();
if(nextToken == null) {
curOffset += values[curIndex].length();
currentStream = null;
return next();
}
// create an modified token which is the offset into the concatenated
// string of all values
Token offsetToken = new Token(nextToken.termText(),
nextToken.startOffset() + curOffset,
nextToken.endOffset() + curOffset);
offsetToken.setPositionIncrement(nextToken.getPositionIncrement() + extra*10);
return offsetToken;
}
/**
* Returns all values as a single String into which the Tokens index with
* their offsets.
*/
public String asSingleValue() {
StringBuilder sb = new StringBuilder();
for(String str : values)
sb.append(str);
return sb.toString();
}
} }
/**
* A simple modification of SimpleFragmenter which additionally creates new
* fragments when an unusually-large position increment is encountered
* (this behaves much better in the presence of multi-valued fields).
*/
class GapFragmenter extends SimpleFragmenter {
public static final int INCREMENT_THRESHOLD = 50;
protected int fragOffsetAccum = 0;
/* (non-Javadoc)
* @see org.apache.lucene.search.highlight.TextFragmenter#start(java.lang.String)
*/
public void start(String originalText) {
fragOffsetAccum = 0;
}
/* (non-Javadoc)
* @see org.apache.lucene.search.highlight.TextFragmenter#isNewFragment(org.apache.lucene.analysis.Token)
*/
public boolean isNewFragment(Token token) {
boolean isNewFrag =
token.endOffset() >= fragOffsetAccum + getFragmentSize() ||
token.getPositionIncrement() > INCREMENT_THRESHOLD;
if(isNewFrag) {
fragOffsetAccum += token.endOffset() - fragOffsetAccum;
}
return isNewFrag;
}
}

View File

@ -339,6 +339,8 @@
<dynamicField name="*_sI" type="string" indexed="true" stored="false"/> <dynamicField name="*_sI" type="string" indexed="true" stored="false"/>
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/> <dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
<dynamicField name="t_*" type="text" indexed="true" stored="true"/> <dynamicField name="t_*" type="text" indexed="true" stored="true"/>
<dynamicField name="tv_*" type="text" indexed="true" stored="true"
termVectors="true" termPositions="true" termOffsets="true"/>
<!-- for testing to ensure that longer patterns are matched first --> <!-- for testing to ensure that longer patterns are matched first -->