SOLR-540: globbing in hl.fl

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@730008 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2008-12-29 22:48:07 +00:00
parent fd89aab62f
commit 8c1db20c9a
4 changed files with 159 additions and 60 deletions

View File

@ -128,6 +128,11 @@ New Features
27. SOLR-847: Enhance the snappull command in ReplicationHandler to accept masterUrl.
(Noble Paul, Preetam Rao via shalin)
28. SOLR-540: Add support for globbing in field names to highlight.
For example, hl.fl=*_text will highlight all fieldnames ending with
_text. (Lars Kotthoff via yonik)
Optimizations
----------------------
1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the

View File

@ -17,8 +17,11 @@ package org.apache.solr.highlight;
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -34,74 +37,87 @@ import org.apache.solr.util.SolrPluginUtils;
public abstract class SolrHighlighter
{
public static Logger log = LoggerFactory.getLogger(SolrHighlighter.class);
public static Logger log = LoggerFactory.getLogger(SolrHighlighter.class);
// Thread safe registry
protected final Map<String,SolrFormatter> formatters =
Collections.synchronizedMap( new HashMap<String, SolrFormatter>() );
// Thread safe registry
protected final Map<String,SolrFormatter> formatters =
Collections.synchronizedMap( new HashMap<String, SolrFormatter>() );
// Thread safe registry
protected final Map<String,SolrFragmenter> fragmenters =
Collections.synchronizedMap( new HashMap<String, SolrFragmenter>() );
// Thread safe registry
protected final Map<String,SolrFragmenter> fragmenters =
Collections.synchronizedMap( new HashMap<String, SolrFragmenter>() );
public abstract void initalize( final Config config );
public abstract void initalize( final Config config );
/**
* Check whether Highlighting is enabled for this request.
* @param params The params controlling Highlighting
* @return <code>true</code> if highlighting enabled, <code>false</code> if not.
*/
public boolean isHighlightingEnabled(SolrParams params) {
return params.getBool(HighlightParams.HIGHLIGHT, false);
}
/**
* Check whether Highlighting is enabled for this request.
* @param params The params controlling Highlighting
* @return <code>true</code> if highlighting enabled, <code>false</code> if not.
*/
public boolean isHighlightingEnabled(SolrParams params) {
return params.getBool(HighlightParams.HIGHLIGHT, false);
}
/**
* Return a String array of the fields to be highlighted.
* Falls back to the programatic defaults, or the default search field if the list of fields
* is not specified in either the handler configuration or the request.
* @param query The current Query
* @param request The current SolrQueryRequest
* @param defaultFields Programmatic default highlight fields, used if nothing is specified in the handler config or the request.
*/
public String[] getHighlightFields(Query query, SolrQueryRequest request, String[] defaultFields) {
String fields[] = request.getParams().getParams(HighlightParams.FIELDS);
/**
* Return a String array of the fields to be highlighted.
* Falls back to the programatic defaults, or the default search field if the list of fields
* is not specified in either the handler configuration or the request.
* @param query The current Query
* @param request The current SolrQueryRequest
* @param defaultFields Programmatic default highlight fields, used if nothing is specified in the handler config or the request.
*/
public String[] getHighlightFields(Query query, SolrQueryRequest request, String[] defaultFields) {
String fields[] = request.getParams().getParams(HighlightParams.FIELDS);
// if no fields specified in the request, or the handler, fall back to programmatic default, or default search field.
if(emptyArray(fields)) {
// use default search field if highlight fieldlist not specified.
if (emptyArray(defaultFields)) {
String defaultSearchField = request.getSchema().getSolrQueryParser(null).getField();
fields = null == defaultSearchField ? new String[]{} : new String[]{defaultSearchField};
}
else {
fields = defaultFields;
}
}
else if (fields.length == 1) {
// if there's a single request/handler value, it may be a space/comma separated list
fields = SolrPluginUtils.split(fields[0]);
}
// if no fields specified in the request, or the handler, fall back to programmatic default, or default search field.
if(emptyArray(fields)) {
// use default search field if highlight fieldlist not specified.
if (emptyArray(defaultFields)) {
String defaultSearchField = request.getSchema().getSolrQueryParser(null).getField();
fields = null == defaultSearchField ? new String[]{} : new String[]{defaultSearchField};
}
else {
fields = defaultFields;
}
}
else if (fields.length == 1) {
if (fields[0].contains("*")) {
// create a Java regular expression from the wildcard string
String fieldRegex = fields[0].replaceAll("\\*", ".*");
Collection<String> storedHighlightFieldNames = request.getSearcher().getStoredHighlightFieldNames();
List<String> storedFieldsToHighlight = new ArrayList<String>();
for (String storedFieldName: storedHighlightFieldNames) {
if (storedFieldName.matches(fieldRegex)) {
storedFieldsToHighlight.add(storedFieldName);
}
}
fields = storedFieldsToHighlight.toArray(new String[] {});
} else {
// if there's a single request/handler value, it may be a space/comma separated list
fields = SolrPluginUtils.split(fields[0]);
}
}
return fields;
}
return fields;
}
protected boolean emptyArray(String[] arr) {
return (arr == null || arr.length == 0 || arr[0] == null || arr[0].trim().length() == 0);
}
protected boolean emptyArray(String[] arr) {
return (arr == null || arr.length == 0 || arr[0] == null || arr[0].trim().length() == 0);
}
/**
* Generates a list of Highlighted query fragments for each item in a list
* of documents, or returns null if highlighting is disabled.
*
* @param docs query results
* @param query the query
* @param req the current request
* @param defaultFields default list of fields to summarize
*
* @return NamedList containing a NamedList for each document, which in
* turns contains sets (field, summary) pairs.
*/
@SuppressWarnings("unchecked")
public abstract NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException;
/**
* Generates a list of Highlighted query fragments for each item in a list
* of documents, or returns null if highlighting is disabled.
*
* @param docs query results
* @param query the query
* @param req the current request
* @param defaultFields default list of fields to summarize
*
* @return NamedList containing a NamedList for each document, which in
* turns contains sets (field, summary) pairs.
*/
@SuppressWarnings("unchecked")
public abstract NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException;
}

View File

@ -31,6 +31,7 @@ import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.request.UnInvertedField;
import org.apache.lucene.util.OpenBitSet;
@ -92,6 +93,9 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
// list of all caches associated with this searcher.
private final SolrCache[] cacheList;
private static final SolrCache[] noCaches = new SolrCache[0];
private final Collection<String> fieldNames;
private Collection<String> storedHighlightFieldNames;
/** Creates a searcher searching the index in the named directory.
*
@ -179,6 +183,8 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
// for DocSets
HASHSET_INVERSE_LOAD_FACTOR = solrConfig.hashSetInverseLoadFactor;
HASHDOCSET_MAXSIZE = solrConfig.hashDocSetMaxSize;
fieldNames = r.getFieldNames(IndexReader.FieldOption.ALL);
}
@ -233,6 +239,36 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
public IndexReader getReader() { return reader; }
/** Direct access to the IndexSchema for use with this searcher */
public IndexSchema getSchema() { return schema; }
/**
* Returns a collection of all field names the index reader knows about.
*/
public Collection<String> getFieldNames() {
return fieldNames;
}
/**
* Returns a collection of the names of all stored fields which can be
* highlighted the index reader knows about.
*/
public Collection<String> getStoredHighlightFieldNames() {
if (storedHighlightFieldNames == null) {
storedHighlightFieldNames = new LinkedList<String>();
for (String fieldName : fieldNames) {
try {
SchemaField field = schema.getField(fieldName);
if (field.stored() &&
((field.getType() instanceof org.apache.solr.schema.TextField) ||
(field.getType() instanceof org.apache.solr.schema.StrField))) {
storedHighlightFieldNames.add(fieldName);
}
} catch (RuntimeException e) { // getField() throws a SolrException, but it arrives as a RuntimeException
log.warn("Field \"" + fieldName + "\" found in index, but not defined in schema.");
}
}
}
return storedHighlightFieldNames;
}
//
// Set default regenerators on filter and query caches if they don't have any
//

View File

@ -22,12 +22,15 @@ import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.util.*;
import org.apache.solr.common.params.HighlightParams;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
/**
* Tests some basic functionality of Solr while demonstrating good
@ -630,4 +633,43 @@ public class HighlighterTest extends AbstractSolrTestCase {
oldHighlight1, oldHighlight2, oldHighlight3
);
}
public void testGetHighlightFields() {
HashMap<String, String> args = new HashMap<String, String>();
args.put("fl", "id score");
args.put("hl", "true");
args.put("hl.fl", "t*");
assertU(adoc("id", "0", "title", "test", // static stored
"text", "test", // static not stored
"foo_s", "test", // dynamic stored
"foo_sI", "test", // dynamic not stored
"weight", "1.0")); // stored but not text
assertU(commit());
assertU(optimize());
TestHarness.LocalRequestFactory lrf = h.getRequestFactory("standard", 0,
10, args);
SolrQueryRequest request = lrf.makeRequest("test");
SolrHighlighter highlighter = request.getCore().getHighlighter();
List<String> highlightFieldNames = Arrays.asList(highlighter
.getHighlightFields(null, request, new String[] {}));
assertTrue("Expected to highlight on field \"title\"", highlightFieldNames
.contains("title"));
assertFalse("Expected to not highlight on field \"text\"",
highlightFieldNames.contains("text"));
assertFalse("Expected to not highlight on field \"weight\"",
highlightFieldNames.contains("weight"));
args.put("hl.fl", "foo_*");
lrf = h.getRequestFactory("standard", 0, 10, args);
request = lrf.makeRequest("test");
highlighter = request.getCore().getHighlighter();
highlightFieldNames = Arrays.asList(highlighter.getHighlightFields(null,
request, new String[] {}));
assertEquals("Expected one field to highlight on", 1, highlightFieldNames
.size());
assertEquals("Expected to highlight on field \"foo_s\"", "foo_s",
highlightFieldNames.get(0));
}
}