mirror of https://github.com/apache/lucene.git
SOLR-1624: Highlighter bug w/ term positons stored and multi valued field
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@888096 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ac47aa00b2
commit
037fdbcd42
|
@ -110,6 +110,10 @@ Bug Fixes
|
|||
* SOLR-1628: log contains incorrect number of adds and deletes.
|
||||
(Thijs Vonk via yonik)
|
||||
|
||||
* SOLR-1624: Highlighter only highlights values from the first field value
|
||||
in a multivalued field when term positions (term vectors) are stored.
|
||||
(Chris Harris via yonik)
|
||||
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
|
|
@ -284,23 +284,24 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
|
||||
String[] summaries = null;
|
||||
List<TextFragment> frags = new ArrayList<TextFragment>();
|
||||
TermOffsetsTokenStream tots = null;
|
||||
for (int j = 0; j < docTexts.length; j++) {
|
||||
// create TokenStream
|
||||
try {
|
||||
// attempt term vectors
|
||||
if( tots == null ) {
|
||||
TokenStream tvStream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName);
|
||||
if (tvStream != null) {
|
||||
tots = new TermOffsetsTokenStream(tvStream);
|
||||
tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
|
||||
} else {
|
||||
// fall back to analyzer
|
||||
tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
|
||||
}
|
||||
|
||||
TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization
|
||||
try {
|
||||
TokenStream tvStream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName);
|
||||
if (tvStream != null) {
|
||||
tots = new TermOffsetsTokenStream(tvStream);
|
||||
}
|
||||
}
|
||||
catch (IllegalArgumentException e) {
|
||||
}
|
||||
catch (IllegalArgumentException e) {
|
||||
// No problem. But we can't use TermOffsets optimization.
|
||||
}
|
||||
|
||||
for (int j = 0; j < docTexts.length; j++) {
|
||||
if( tots != null ) {
|
||||
// if we're using TermOffsets optimization, then get the next
|
||||
// field value's TokenStream (i.e. get field j's TokenStream) from tots:
|
||||
tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
|
||||
} else {
|
||||
// fall back to analyzer
|
||||
tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
|
||||
}
|
||||
|
|
|
@ -190,6 +190,33 @@ public class HighlighterTest extends AbstractSolrTestCase {
|
|||
);
|
||||
}
|
||||
|
||||
// Variant of testTermVecMultiValuedHighlight to make sure that
|
||||
// more than just the first value of a multi-valued field is
|
||||
// considered for highlighting.
|
||||
public void testTermVecMultiValuedHighlight2() throws Exception {
|
||||
|
||||
// do summarization using term vectors on multivalued field
|
||||
HashMap<String,String> args = new HashMap<String,String>();
|
||||
args.put("hl", "true");
|
||||
args.put("hl.fl", "tv_mv_text");
|
||||
args.put("hl.snippets", "2");
|
||||
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
|
||||
"standard",0,200,args);
|
||||
|
||||
String shortText = "short";
|
||||
assertU(adoc("tv_mv_text", shortText,
|
||||
"tv_mv_text", LONG_TEXT,
|
||||
"id", "1"));
|
||||
assertU(commit());
|
||||
assertU(optimize());
|
||||
assertQ("Basic summarization",
|
||||
sumLRF.makeRequest("tv_mv_text:long"),
|
||||
"//lst[@name='highlighting']/lst[@name='1']",
|
||||
"//lst[@name='1']/arr[@name='tv_mv_text']/str[.='a <em>long</em> days night this should be a piece of text which']",
|
||||
"//arr[@name='tv_mv_text']/str[.=' <em>long</em> fragments.']"
|
||||
);
|
||||
}
|
||||
|
||||
public void testDisMaxHighlight() {
|
||||
|
||||
// same test run through dismax handler
|
||||
|
|
Loading…
Reference in New Issue