SOLR-6692: hl.maxAnalyzedChars should apply cumulatively on a multi-valued field

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1673200 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
David Wayne Smiley 2015-04-13 14:08:07 +00:00
parent c6482c1488
commit f5071289e7
3 changed files with 121 additions and 92 deletions

View File

@ -128,7 +128,11 @@ Other Changes
* SOLR-7384: Fix spurious failures in FullSolrCloudDistribCmdsTest. (shalin)
* SOLR-6692: The default highlighter is much more extensible. (David Smiley)
* SOLR-6692: Default highlighter changes:
- hl.maxAnalyzedChars now applies cumulatively on a multi-valied field.
- fragment ranking on a multi-valued field should be more relevant.
- Much more extensible.
(David Smiley)
================== 5.1.0 ==================

View File

@ -81,7 +81,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
protected final SolrCore solrCore;
/** Will be invoked via reflection */
//Will be invoked via reflection
public DefaultSolrHighlighter(SolrCore solrCore) {
this.solrCore = solrCore;
}
@ -224,8 +224,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
boolean reqFieldMatch = request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false);
if (reqFieldMatch) {
return new QueryTermScorer(query, request.getSearcher().getIndexReader(), fieldName);
}
else {
} else {
return new QueryTermScorer(query);
}
}
@ -538,6 +537,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
}
highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
maxCharsToAnalyze -= thisText.length();
// Highlight!
try {
@ -594,6 +594,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
String value = thisField.stringValue();
result.add(value);
maxCharsToAnalyze -= value.length();//we exit early if we'll never get to analyze the value
maxValues--;
if (maxValues <= 0 || maxCharsToAnalyze <= 0) {
break;

View File

@ -515,33 +515,57 @@ public class HighlighterTest extends SolrTestCaseJ4 {
args.put("fl", "id score");
args.put("hl", "true");
args.put("hl.snippets", "10");
args.put("hl.fl", "t_text");
final String field = random().nextBoolean() ? "t_text" : "tv_text";
args.put("hl.fl", field);
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard", 0, 200, args);
assertU(adoc("t_text", LONG_TEXT, "id", "1"));
assertU(adoc(field, LONG_TEXT, "id", "1"));
assertU(commit());
assertU(optimize());
assertQ("token at start of text",
sumLRF.makeRequest("t_text:disjoint"),
sumLRF.makeRequest(field + ":disjoint"),
"//lst[@name='highlighting']/lst[@name='1']",
"//lst[@name='1']/arr[count(str)=1]"
);
args.put("hl.maxAnalyzedChars", "20");
sumLRF = h.getRequestFactory("standard", 0, 200, args);
assertQ("token at end of text",
sumLRF.makeRequest("t_text:disjoint"),
sumLRF.makeRequest(field + ":disjoint"),
"//lst[@name='highlighting']/lst[@name='1']",
"//lst[@name='1'][not(*)]"
);
args.put("hl.maxAnalyzedChars", "-1");
sumLRF = h.getRequestFactory("standard", 0, 200, args);
assertQ("token at start of text",
sumLRF.makeRequest("t_text:disjoint"),
sumLRF.makeRequest(field + ":disjoint"),
"//lst[@name='highlighting']/lst[@name='1']",
"//lst[@name='1']/arr[count(str)=1]"
);
}
// Test multi-valued together with hl.maxAnalyzedChars
@Test
public void testMultiValuedMaxAnalyzedChars() throws Exception {
String shortText = "some short blah blah blah blah";
final String field = random().nextBoolean() ? "tv_mv_text" : "textgap"; // term vecs or not
assertU(adoc(field, shortText,
field, LONG_TEXT,
"id", "1"));
assertU(commit());
assertQ(req("q", field + ":(short OR long)",
"indent", "on",
"hl", "true",
"hl.fl", field,
"hl.snippets", "2",
"hl.maxAnalyzedChars", "8"),
"//lst[@name='highlighting']/lst[@name='1']/arr[count(*)=1]",
"//lst[@name='1']/arr/str[1][.='some <em>short</em>']"
//"//lst[@name='1']/arr/str[2][.='a <em>long</em> days']"
);
}
@Test