mirror of https://github.com/apache/lucene.git
SOLR-452 commit: hl.mergeContiguous
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@610191 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a405feac80
commit
06b3f0eed0
|
@ -87,9 +87,11 @@ New Features
|
|||
13. SOLR-225: Enable pluggable highlighting classes. Allow configurable
|
||||
highlighting formatters and Fragmenters. (ryan)
|
||||
|
||||
14. SOLR-273/376: Added hl.maxAnalyzedChars highlighting parameter, defaulting to
|
||||
50k. Also add hl.alternateField, which allows the specification of a backup
|
||||
field to use as summary if no keywords are matched. (klaas)
|
||||
14. SOLR-273/376/452: Added hl.maxAnalyzedChars highlighting parameter, defaulting
|
||||
to 50k, hl.alternateField, which allows the specification of a backup
|
||||
field to use as summary if no keywords are matched, and hl.mergeContiguous,
|
||||
which combines fragments if they are adjacent in the source document.
|
||||
(klaas, Grant Ingersoll via klaas)
|
||||
|
||||
15. SOLR-291: Control maximum number of documents to cache for any entry
|
||||
in the queryResultCache via queryResultMaxDocsCached solrconfig.xml
|
||||
|
|
|
@ -33,6 +33,7 @@ public interface HighlightParams {
|
|||
public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch";
|
||||
public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField";
|
||||
|
||||
public static final String MERGE_CONTIGUOUS_FRAGMENTS = HIGHLIGHT + ".mergeContiguous";
|
||||
// Formatter
|
||||
public static final String SIMPLE = "simple";
|
||||
public static final String SIMPLE_PRE = HIGHLIGHT+"."+SIMPLE+".pre";
|
||||
|
|
|
@ -187,6 +187,15 @@ public class SolrHighlighter
|
|||
protected int getMaxSnippets(String fieldName, SolrParams params) {
|
||||
return params.getFieldInt(fieldName, HighlightParams.SNIPPETS,1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return whether adjacent fragments should be merged.
|
||||
* @param fieldName The name of the field
|
||||
* @param params The params controlling Highlighting
|
||||
*/
|
||||
protected boolean isMergeContiguousFragments(String fieldName, SolrParams params){
|
||||
return params.getFieldBool(fieldName, HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a formatter appropriate for this field. If a formatter
|
||||
|
@ -260,20 +269,22 @@ public class SolrHighlighter
|
|||
searcher.readDocs(readDocs, docs, fset);
|
||||
}
|
||||
|
||||
// Highlight each document
|
||||
DocIterator iterator = docs.iterator();
|
||||
for (int i = 0; i < docs.size(); i++) {
|
||||
int docId = iterator.nextDoc();
|
||||
Document doc = readDocs[i];
|
||||
NamedList docSummaries = new SimpleOrderedMap();
|
||||
for (String fieldName : fieldNames) {
|
||||
fieldName = fieldName.trim();
|
||||
String[] docTexts = doc.getValues(fieldName);
|
||||
if (docTexts == null) continue;
|
||||
|
||||
// get highlighter, and number of fragments for this field
|
||||
Highlighter highlighter = getHighlighter(query, fieldName, req);
|
||||
int numFragments = getMaxSnippets(fieldName, params);
|
||||
// Highlight each document
|
||||
DocIterator iterator = docs.iterator();
|
||||
for (int i = 0; i < docs.size(); i++) {
|
||||
int docId = iterator.nextDoc();
|
||||
Document doc = readDocs[i];
|
||||
NamedList docSummaries = new SimpleOrderedMap();
|
||||
for (String fieldName : fieldNames) {
|
||||
fieldName = fieldName.trim();
|
||||
String[] docTexts = doc.getValues(fieldName);
|
||||
if (docTexts == null) continue;
|
||||
|
||||
// get highlighter, and number of fragments for this field
|
||||
Highlighter highlighter = getHighlighter(query, fieldName, req);
|
||||
int numFragments = getMaxSnippets(fieldName, params);
|
||||
boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);
|
||||
|
||||
String[] summaries = null;
|
||||
TextFragment[] frag;
|
||||
|
@ -288,7 +299,7 @@ public class SolrHighlighter
|
|||
// fall back to analyzer
|
||||
tstream = new TokenOrderingFilter(schema.getAnalyzer().tokenStream(fieldName, new StringReader(docTexts[0])), 10);
|
||||
}
|
||||
frag = highlighter.getBestTextFragments(tstream, docTexts[0], false, numFragments);
|
||||
frag = highlighter.getBestTextFragments(tstream, docTexts[0], mergeContiguousFragments, numFragments);
|
||||
}
|
||||
else {
|
||||
// multi-valued field
|
||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.solr.highlight;
|
|||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.util.*;
|
||||
import org.apache.solr.common.params.HighlightParams;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
|
@ -27,7 +29,11 @@ import java.util.HashMap;
|
|||
*/
|
||||
public class HighlighterTest extends AbstractSolrTestCase {
|
||||
|
||||
private static String LONG_TEXT = "a long days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is is is is is is isis is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is sufficiently lengthly to produce multiple fragments which are not concatenated at all--we want two disjoint long fragments.";
|
||||
private static String LONG_TEXT = "a long days night this should be a piece of text which is is is is is is is is is is is is is is is is is is is " +
|
||||
"is is is is is isis is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is is " +
|
||||
"is is is is is is is is is is is is is " +
|
||||
"is is is is is is is is is is is is is is is is is is is is sufficiently lengthly to produce multiple fragments which are not concatenated " +
|
||||
"at all--we want two disjoint long fragments.";
|
||||
|
||||
@Override public String getSchemaFile() { return "schema.xml"; }
|
||||
@Override public String getSolrConfigFile() { return "solrconfig.xml"; }
|
||||
|
@ -67,6 +73,52 @@ public class HighlighterTest extends AbstractSolrTestCase {
|
|||
assertTrue( regex instanceof RegexFragmenter );
|
||||
}
|
||||
|
||||
public void testMergeContiguous() throws Exception {
|
||||
HashMap<String,String> args = new HashMap<String,String>();
|
||||
args.put(HighlightParams.HIGHLIGHT, "true");
|
||||
args.put("df", "t_text");
|
||||
args.put(HighlightParams.FIELDS, "");
|
||||
args.put(HighlightParams.SNIPPETS, String.valueOf(4));
|
||||
args.put(HighlightParams.FRAGSIZE, String.valueOf(40));
|
||||
args.put(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "true");
|
||||
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
|
||||
"standard", 0, 200, args);
|
||||
String input = "this is some long text. It has the word long in many places. In fact, it has long on some different fragments. " +
|
||||
"Let us see what happens to long in this case.";
|
||||
String gold = "this is some <em>long</em> text. It has the word <em>long</em> in many places. In fact, it has <em>long</em> on some different fragments. " +
|
||||
"Let us see what happens to <em>long</em> in this case.";
|
||||
assertU(adoc("t_text", input, "id", "1"));
|
||||
assertU(commit());
|
||||
assertU(optimize());
|
||||
assertQ("Merge Contiguous",
|
||||
sumLRF.makeRequest("t_text:long"),
|
||||
"//lst[@name='highlighting']/lst[@name='1']",
|
||||
"//lst[@name='1']/arr[@name='t_text']/str[.='" + gold + "']"
|
||||
);
|
||||
args.put("f.t_text." + HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "true");
|
||||
assertU(adoc("t_text", input, "id", "1"));
|
||||
assertU(commit());
|
||||
assertU(optimize());
|
||||
assertQ("Merge Contiguous",
|
||||
sumLRF.makeRequest("t_text:long"),
|
||||
"//lst[@name='highlighting']/lst[@name='1']",
|
||||
"//lst[@name='1']/arr[@name='t_text']/str[.='" + gold + "']"
|
||||
);
|
||||
|
||||
args.put(HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "false");
|
||||
args.put("f.t_text." + HighlightParams.MERGE_CONTIGUOUS_FRAGMENTS, "false");
|
||||
sumLRF = h.getRequestFactory(
|
||||
"standard", 0, 200, args);
|
||||
assertQ("Merge Contiguous",
|
||||
sumLRF.makeRequest("t_text:long"),
|
||||
"//lst[@name='highlighting']/lst[@name='1']",
|
||||
"//lst[@name='1']/arr[@name='t_text']/str[.='this is some <em>long</em> text. It has']",
|
||||
"//lst[@name='1']/arr[@name='t_text']/str[.=' the word <em>long</em> in many places. In fact, it has']",
|
||||
"//lst[@name='1']/arr[@name='t_text']/str[.=' <em>long</em> on some different fragments. Let us']",
|
||||
"//lst[@name='1']/arr[@name='t_text']/str[.=' see what happens to <em>long</em> in this case.']"
|
||||
);
|
||||
}
|
||||
|
||||
public void testTermVecHighlight() {
|
||||
|
||||
// do summarization using term vectors
|
||||
|
|
Loading…
Reference in New Issue