SOLR-540: globbing in hl.fl

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@730008 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2008-12-29 22:48:07 +00:00
parent fd89aab62f
commit 8c1db20c9a
4 changed files with 159 additions and 60 deletions

View File

@ -128,6 +128,11 @@ New Features
27. SOLR-847: Enhance the snappull command in ReplicationHandler to accept masterUrl. 27. SOLR-847: Enhance the snappull command in ReplicationHandler to accept masterUrl.
(Noble Paul, Preetam Rao via shalin) (Noble Paul, Preetam Rao via shalin)
28. SOLR-540: Add support for globbing in field names to highlight.
For example, hl.fl=*_text will highlight all fieldnames ending with
_text. (Lars Kotthoff via yonik)
Optimizations Optimizations
---------------------- ----------------------
1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the 1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the

View File

@ -17,8 +17,11 @@ package org.apache.solr.highlight;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -34,74 +37,87 @@ import org.apache.solr.util.SolrPluginUtils;
public abstract class SolrHighlighter public abstract class SolrHighlighter
{ {
public static Logger log = LoggerFactory.getLogger(SolrHighlighter.class); public static Logger log = LoggerFactory.getLogger(SolrHighlighter.class);
// Thread safe registry // Thread safe registry
protected final Map<String,SolrFormatter> formatters = protected final Map<String,SolrFormatter> formatters =
Collections.synchronizedMap( new HashMap<String, SolrFormatter>() ); Collections.synchronizedMap( new HashMap<String, SolrFormatter>() );
// Thread safe registry // Thread safe registry
protected final Map<String,SolrFragmenter> fragmenters = protected final Map<String,SolrFragmenter> fragmenters =
Collections.synchronizedMap( new HashMap<String, SolrFragmenter>() ); Collections.synchronizedMap( new HashMap<String, SolrFragmenter>() );
public abstract void initalize( final Config config ); public abstract void initalize( final Config config );
/** /**
* Check whether Highlighting is enabled for this request. * Check whether Highlighting is enabled for this request.
* @param params The params controlling Highlighting * @param params The params controlling Highlighting
* @return <code>true</code> if highlighting enabled, <code>false</code> if not. * @return <code>true</code> if highlighting enabled, <code>false</code> if not.
*/ */
public boolean isHighlightingEnabled(SolrParams params) { public boolean isHighlightingEnabled(SolrParams params) {
return params.getBool(HighlightParams.HIGHLIGHT, false); return params.getBool(HighlightParams.HIGHLIGHT, false);
} }
/** /**
* Return a String array of the fields to be highlighted. * Return a String array of the fields to be highlighted.
* Falls back to the programatic defaults, or the default search field if the list of fields * Falls back to the programatic defaults, or the default search field if the list of fields
* is not specified in either the handler configuration or the request. * is not specified in either the handler configuration or the request.
* @param query The current Query * @param query The current Query
* @param request The current SolrQueryRequest * @param request The current SolrQueryRequest
* @param defaultFields Programmatic default highlight fields, used if nothing is specified in the handler config or the request. * @param defaultFields Programmatic default highlight fields, used if nothing is specified in the handler config or the request.
*/ */
public String[] getHighlightFields(Query query, SolrQueryRequest request, String[] defaultFields) { public String[] getHighlightFields(Query query, SolrQueryRequest request, String[] defaultFields) {
String fields[] = request.getParams().getParams(HighlightParams.FIELDS); String fields[] = request.getParams().getParams(HighlightParams.FIELDS);
// if no fields specified in the request, or the handler, fall back to programmatic default, or default search field. // if no fields specified in the request, or the handler, fall back to programmatic default, or default search field.
if(emptyArray(fields)) { if(emptyArray(fields)) {
// use default search field if highlight fieldlist not specified. // use default search field if highlight fieldlist not specified.
if (emptyArray(defaultFields)) { if (emptyArray(defaultFields)) {
String defaultSearchField = request.getSchema().getSolrQueryParser(null).getField(); String defaultSearchField = request.getSchema().getSolrQueryParser(null).getField();
fields = null == defaultSearchField ? new String[]{} : new String[]{defaultSearchField}; fields = null == defaultSearchField ? new String[]{} : new String[]{defaultSearchField};
} }
else { else {
fields = defaultFields; fields = defaultFields;
} }
} }
else if (fields.length == 1) { else if (fields.length == 1) {
// if there's a single request/handler value, it may be a space/comma separated list if (fields[0].contains("*")) {
fields = SolrPluginUtils.split(fields[0]); // create a Java regular expression from the wildcard string
} String fieldRegex = fields[0].replaceAll("\\*", ".*");
Collection<String> storedHighlightFieldNames = request.getSearcher().getStoredHighlightFieldNames();
List<String> storedFieldsToHighlight = new ArrayList<String>();
for (String storedFieldName: storedHighlightFieldNames) {
if (storedFieldName.matches(fieldRegex)) {
storedFieldsToHighlight.add(storedFieldName);
}
}
fields = storedFieldsToHighlight.toArray(new String[] {});
} else {
// if there's a single request/handler value, it may be a space/comma separated list
fields = SolrPluginUtils.split(fields[0]);
}
}
return fields; return fields;
} }
protected boolean emptyArray(String[] arr) { protected boolean emptyArray(String[] arr) {
return (arr == null || arr.length == 0 || arr[0] == null || arr[0].trim().length() == 0); return (arr == null || arr.length == 0 || arr[0] == null || arr[0].trim().length() == 0);
} }
/** /**
* Generates a list of Highlighted query fragments for each item in a list * Generates a list of Highlighted query fragments for each item in a list
* of documents, or returns null if highlighting is disabled. * of documents, or returns null if highlighting is disabled.
* *
* @param docs query results * @param docs query results
* @param query the query * @param query the query
* @param req the current request * @param req the current request
* @param defaultFields default list of fields to summarize * @param defaultFields default list of fields to summarize
* *
* @return NamedList containing a NamedList for each document, which in * @return NamedList containing a NamedList for each document, which in
* turns contains sets (field, summary) pairs. * turns contains sets (field, summary) pairs.
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public abstract NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException; public abstract NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException;
} }

View File

@ -31,6 +31,7 @@ import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoMBean; import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.request.UnInvertedField; import org.apache.solr.request.UnInvertedField;
import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.OpenBitSet;
@ -92,6 +93,9 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
// list of all caches associated with this searcher. // list of all caches associated with this searcher.
private final SolrCache[] cacheList; private final SolrCache[] cacheList;
private static final SolrCache[] noCaches = new SolrCache[0]; private static final SolrCache[] noCaches = new SolrCache[0];
private final Collection<String> fieldNames;
private Collection<String> storedHighlightFieldNames;
/** Creates a searcher searching the index in the named directory. /** Creates a searcher searching the index in the named directory.
* *
@ -179,6 +183,8 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
// for DocSets // for DocSets
HASHSET_INVERSE_LOAD_FACTOR = solrConfig.hashSetInverseLoadFactor; HASHSET_INVERSE_LOAD_FACTOR = solrConfig.hashSetInverseLoadFactor;
HASHDOCSET_MAXSIZE = solrConfig.hashDocSetMaxSize; HASHDOCSET_MAXSIZE = solrConfig.hashDocSetMaxSize;
fieldNames = r.getFieldNames(IndexReader.FieldOption.ALL);
} }
@ -233,6 +239,36 @@ public class SolrIndexSearcher extends Searcher implements SolrInfoMBean {
public IndexReader getReader() { return reader; } public IndexReader getReader() { return reader; }
/** Direct access to the IndexSchema for use with this searcher */ /** Direct access to the IndexSchema for use with this searcher */
public IndexSchema getSchema() { return schema; } public IndexSchema getSchema() { return schema; }
/**
* Returns a collection of all field names the index reader knows about.
*/
public Collection<String> getFieldNames() {
return fieldNames;
}
/**
* Returns a collection of the names of all stored fields which can be
* highlighted the index reader knows about.
*/
public Collection<String> getStoredHighlightFieldNames() {
if (storedHighlightFieldNames == null) {
storedHighlightFieldNames = new LinkedList<String>();
for (String fieldName : fieldNames) {
try {
SchemaField field = schema.getField(fieldName);
if (field.stored() &&
((field.getType() instanceof org.apache.solr.schema.TextField) ||
(field.getType() instanceof org.apache.solr.schema.StrField))) {
storedHighlightFieldNames.add(fieldName);
}
} catch (RuntimeException e) { // getField() throws a SolrException, but it arrives as a RuntimeException
log.warn("Field \"" + fieldName + "\" found in index, but not defined in schema.");
}
}
}
return storedHighlightFieldNames;
}
// //
// Set default regenerators on filter and query caches if they don't have any // Set default regenerators on filter and query caches if they don't have any
// //

View File

@ -22,12 +22,15 @@ import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.util.*; import org.apache.solr.util.*;
import org.apache.solr.common.params.HighlightParams; import org.apache.solr.common.params.HighlightParams;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
/** /**
* Tests some basic functionality of Solr while demonstrating good * Tests some basic functionality of Solr while demonstrating good
@ -630,4 +633,43 @@ public class HighlighterTest extends AbstractSolrTestCase {
oldHighlight1, oldHighlight2, oldHighlight3 oldHighlight1, oldHighlight2, oldHighlight3
); );
} }
public void testGetHighlightFields() {
HashMap<String, String> args = new HashMap<String, String>();
args.put("fl", "id score");
args.put("hl", "true");
args.put("hl.fl", "t*");
assertU(adoc("id", "0", "title", "test", // static stored
"text", "test", // static not stored
"foo_s", "test", // dynamic stored
"foo_sI", "test", // dynamic not stored
"weight", "1.0")); // stored but not text
assertU(commit());
assertU(optimize());
TestHarness.LocalRequestFactory lrf = h.getRequestFactory("standard", 0,
10, args);
SolrQueryRequest request = lrf.makeRequest("test");
SolrHighlighter highlighter = request.getCore().getHighlighter();
List<String> highlightFieldNames = Arrays.asList(highlighter
.getHighlightFields(null, request, new String[] {}));
assertTrue("Expected to highlight on field \"title\"", highlightFieldNames
.contains("title"));
assertFalse("Expected to not highlight on field \"text\"",
highlightFieldNames.contains("text"));
assertFalse("Expected to not highlight on field \"weight\"",
highlightFieldNames.contains("weight"));
args.put("hl.fl", "foo_*");
lrf = h.getRequestFactory("standard", 0, 10, args);
request = lrf.makeRequest("test");
highlighter = request.getCore().getHighlighter();
highlightFieldNames = Arrays.asList(highlighter.getHighlightFields(null,
request, new String[] {}));
assertEquals("Expected one field to highlight on", 1, highlightFieldNames
.size());
assertEquals("Expected to highlight on field \"foo_s\"", "foo_s",
highlightFieldNames.get(0));
}
} }