SOLR-1624: Highlighter bug w/ term positons stored and multi valued field

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@888096 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2009-12-07 19:24:15 +00:00
parent ac47aa00b2
commit 037fdbcd42
3 changed files with 48 additions and 16 deletions

View File

@ -110,6 +110,10 @@ Bug Fixes
* SOLR-1628: log contains incorrect number of adds and deletes.
(Thijs Vonk via yonik)
* SOLR-1624: Highlighter only highlights values from the first field value
in a multivalued field when term positions (term vectors) are stored.
(Chris Harris via yonik)
Other Changes
----------------------

View File

@ -284,23 +284,24 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
String[] summaries = null;
List<TextFragment> frags = new ArrayList<TextFragment>();
TermOffsetsTokenStream tots = null;
for (int j = 0; j < docTexts.length; j++) {
// create TokenStream
try {
// attempt term vectors
if( tots == null ) {
TokenStream tvStream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName);
if (tvStream != null) {
tots = new TermOffsetsTokenStream(tvStream);
tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
} else {
// fall back to analyzer
tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
}
TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization
try {
TokenStream tvStream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName);
if (tvStream != null) {
tots = new TermOffsetsTokenStream(tvStream);
}
}
catch (IllegalArgumentException e) {
}
catch (IllegalArgumentException e) {
// No problem. But we can't use TermOffsets optimization.
}
for (int j = 0; j < docTexts.length; j++) {
if( tots != null ) {
// if we're using TermOffsets optimization, then get the next
// field value's TokenStream (i.e. get field j's TokenStream) from tots:
tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
} else {
// fall back to analyzer
tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
}

View File

@ -190,6 +190,33 @@ public class HighlighterTest extends AbstractSolrTestCase {
);
}
// Variant of testTermVecMultiValuedHighlight to make sure that
// more than just the first value of a multi-valued field is
// considered for highlighting.
public void testTermVecMultiValuedHighlight2() throws Exception {
// do summarization using term vectors on multivalued field
HashMap<String,String> args = new HashMap<String,String>();
args.put("hl", "true");
args.put("hl.fl", "tv_mv_text");
args.put("hl.snippets", "2");
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard",0,200,args);
String shortText = "short";
assertU(adoc("tv_mv_text", shortText,
"tv_mv_text", LONG_TEXT,
"id", "1"));
assertU(commit());
assertU(optimize());
assertQ("Basic summarization",
sumLRF.makeRequest("tv_mv_text:long"),
"//lst[@name='highlighting']/lst[@name='1']",
"//lst[@name='1']/arr[@name='tv_mv_text']/str[.='a <em>long</em> days night this should be a piece of text which']",
"//arr[@name='tv_mv_text']/str[.=' <em>long</em> fragments.']"
);
}
public void testDisMaxHighlight() {
// same test run through dismax handler