SOLR-452 commit: hl.mergeContiguous

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@610191 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mike Klaas 2008-01-08 22:02:46 +00:00
parent a405feac80
commit 06b3f0eed0
4 changed files with 84 additions and 18 deletions

View File

@ -87,9 +87,11 @@ New Features
13. SOLR-225: Enable pluggable highlighting classes. Allow configurable
highlighting formatters and Fragmenters. (ryan)
14. SOLR-273/376: Added hl.maxAnalyzedChars highlighting parameter, defaulting to
50k. Also add hl.alternateField, which allows the specification of a backup
field to use as summary if no keywords are matched. (klaas)
14. SOLR-273/376/452: Added hl.maxAnalyzedChars highlighting parameter, defaulting
to 50k, hl.alternateField, which allows the specification of a backup
field to use as summary if no keywords are matched, and hl.mergeContiguous,
which combines fragments if they are adjacent in the source document.
(klaas, Grant Ingersoll via klaas)
15. SOLR-291: Control maximum number of documents to cache for any entry
in the queryResultCache via queryResultMaxDocsCached solrconfig.xml

View File

@ -33,6 +33,7 @@ public interface HighlightParams {
public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch";
public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField";
public static final String MERGE_CONTIGUOUS_FRAGMENTS = HIGHLIGHT + ".mergeContiguous";
// Formatter
public static final String SIMPLE = "simple";
public static final String SIMPLE_PRE = HIGHLIGHT+"."+SIMPLE+".pre";

View File

@ -187,6 +187,15 @@ public class SolrHighlighter
protected int getMaxSnippets(String fieldName, SolrParams params) {
return params.getFieldInt(fieldName, HighlightParams.SNIPPETS,1);
}
/**
* Return whether adjacent fragments should be merged.
* @param fieldName The name of the field
* @param params The params controlling Highlighting
*/
protected boolean isMergeContiguousFragments(String fieldName, SolrParams params){
return params.getFieldBool(fieldName, HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, false);
}
/**
* Return a formatter appropriate for this field. If a formatter
@ -260,20 +269,22 @@ public class SolrHighlighter
searcher.readDocs(readDocs, docs, fset);
}
// Highlight each document
DocIterator iterator = docs.iterator();
for (int i = 0; i < docs.size(); i++) {
int docId = iterator.nextDoc();
Document doc = readDocs[i];
NamedList docSummaries = new SimpleOrderedMap();
for (String fieldName : fieldNames) {
fieldName = fieldName.trim();
String[] docTexts = doc.getValues(fieldName);
if (docTexts == null) continue;
// get highlighter, and number of fragments for this field
Highlighter highlighter = getHighlighter(query, fieldName, req);
int numFragments = getMaxSnippets(fieldName, params);
// Highlight each document
DocIterator iterator = docs.iterator();
for (int i = 0; i < docs.size(); i++) {
int docId = iterator.nextDoc();
Document doc = readDocs[i];
NamedList docSummaries = new SimpleOrderedMap();
for (String fieldName : fieldNames) {
fieldName = fieldName.trim();
String[] docTexts = doc.getValues(fieldName);
if (docTexts == null) continue;
// get highlighter, and number of fragments for this field
Highlighter highlighter = getHighlighter(query, fieldName, req);
int numFragments = getMaxSnippets(fieldName, params);
boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);
String[] summaries = null;
TextFragment[] frag;
@ -288,7 +299,7 @@ public class SolrHighlighter
// fall back to analyzer
tstream = new TokenOrderingFilter(schema.getAnalyzer().tokenStream(fieldName, new StringReader(docTexts[0])), 10);
}
frag = highlighter.getBestTextFragments(tstream, docTexts[0], false, numFragments);
frag = highlighter.getBestTextFragments(tstream, docTexts[0], mergeContiguousFragments, numFragments);
}
else {
// multi-valued field

View File

@ -19,6 +19,8 @@ package org.apache.solr.highlight;
import org.apache.solr.core.SolrCore;
import org.apache.solr.util.*;
import org.apache.solr.common.params.HighlightParams;
import java.util.HashMap;
/**
@ -27,7 +29,11 @@ import java.util.HashMap;
*/
public class HighlighterTest extends AbstractSolrTestCase {
private static String LONG_TEXT = "a long days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is is is is is is isis is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is sufficiently lengthly to produce multiple fragments which are not concatenated at all--we want two disjoint long fragments.";
private static String LONG_TEXT = "a long days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is " +
"is is is is is isis is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is " +
"is is is is is is is is is is is is is " +
"is is is is is is is is is is is is is is is is is is is is sufficiently lengthly to produce multiple fragments which are not concatenated " +
"at all--we want two disjoint long fragments.";
@Override public String getSchemaFile() { return "schema.xml"; }
@Override public String getSolrConfigFile() { return "solrconfig.xml"; }
@ -67,6 +73,52 @@ public class HighlighterTest extends AbstractSolrTestCase {
assertTrue( regex instanceof RegexFragmenter );
}
public void testMergeContiguous() throws Exception {
HashMap<String,String> args = new HashMap<String,String>();
args.put(HighlightParams.HIGHLIGHT, "true");
args.put("df", "t_text");
args.put(HighlightParams.FIELDS, "");
args.put(HighlightParams.SNIPPETS, String.valueOf(4));
args.put(HighlightParams.FRAGSIZE, String.valueOf(40));
args.put(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "true");
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard", 0, 200, args);
String input = "this is some long text. It has the word long in many places. In fact, it has long on some different fragments. " +
"Let us see what happens to long in this case.";
String gold = "this is some <em>long</em> text. It has the word <em>long</em> in many places. In fact, it has <em>long</em> on some different fragments. " +
"Let us see what happens to <em>long</em> in this case.";
assertU(adoc("t_text", input, "id", "1"));
assertU(commit());
assertU(optimize());
assertQ("Merge Contiguous",
sumLRF.makeRequest("t_text:long"),
"//lst[@name='highlighting']/lst[@name='1']",
"//lst[@name='1']/arr[@name='t_text']/str[.='" + gold + "']"
);
args.put("f.t_text." + HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "true");
assertU(adoc("t_text", input, "id", "1"));
assertU(commit());
assertU(optimize());
assertQ("Merge Contiguous",
sumLRF.makeRequest("t_text:long"),
"//lst[@name='highlighting']/lst[@name='1']",
"//lst[@name='1']/arr[@name='t_text']/str[.='" + gold + "']"
);
args.put(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "false");
args.put("f.t_text." + HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "false");
sumLRF = h.getRequestFactory(
"standard", 0, 200, args);
assertQ("Merge Contiguous",
sumLRF.makeRequest("t_text:long"),
"//lst[@name='highlighting']/lst[@name='1']",
"//lst[@name='1']/arr[@name='t_text']/str[.='this is some <em>long</em> text. It has']",
"//lst[@name='1']/arr[@name='t_text']/str[.=' the word <em>long</em> in many places. In fact, it has']",
"//lst[@name='1']/arr[@name='t_text']/str[.=' <em>long</em> on some different fragments. Let us']",
"//lst[@name='1']/arr[@name='t_text']/str[.=' see what happens to <em>long</em> in this case.']"
);
}
public void testTermVecHighlight() {
// do summarization using term vectors