mirror of https://github.com/apache/lucene.git
SOLR-925: Fixed highlighting on fields with multiValued="true" and termOffsets="true"
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@729450 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8563645b80
commit
3b57a5a624
|
@ -118,7 +118,7 @@ New Features
|
||||||
optimized distributed faceting refinement by lowering parsing overhead and
|
optimized distributed faceting refinement by lowering parsing overhead and
|
||||||
by making requests and responses smaller.
|
by making requests and responses smaller.
|
||||||
|
|
||||||
25. SOLR-876: WOrdDelimiterFilter now supports a splitOnNumerics
|
25. SOLR-876: WordDelimiterFilter now supports a splitOnNumerics
|
||||||
option, as well as a list of protected terms.
|
option, as well as a list of protected terms.
|
||||||
(Dan Rosher via hossman)
|
(Dan Rosher via hossman)
|
||||||
|
|
||||||
|
@ -200,6 +200,8 @@ Bug Fixes
|
||||||
|
|
||||||
22. SOLR-897: Fixed Argument list too long error when there are lots of snapshots/backups (Dan Rosher via billa)
|
22. SOLR-897: Fixed Argument list too long error when there are lots of snapshots/backups (Dan Rosher via billa)
|
||||||
|
|
||||||
|
23. SOLR-925: Fixed highlighting on fields with multiValued="true" and termOffsets="true" (koji)
|
||||||
|
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
|
@ -271,11 +271,14 @@ public class DefaultSolrHighlighter extends SolrHighlighter
|
||||||
|
|
||||||
String[] summaries = null;
|
String[] summaries = null;
|
||||||
List<TextFragment> frags = new ArrayList<TextFragment>();
|
List<TextFragment> frags = new ArrayList<TextFragment>();
|
||||||
|
TermOffsetsTokenStream tots = null;
|
||||||
for (int j = 0; j < docTexts.length; j++) {
|
for (int j = 0; j < docTexts.length; j++) {
|
||||||
// create TokenStream
|
// create TokenStream
|
||||||
try {
|
try {
|
||||||
// attempt term vectors
|
// attempt term vectors
|
||||||
tstream = TokenSources.getTokenStream(searcher.getReader(), docId, fieldName);
|
if( tots == null )
|
||||||
|
tots = new TermOffsetsTokenStream( TokenSources.getTokenStream(searcher.getReader(), docId, fieldName) );
|
||||||
|
tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
|
||||||
}
|
}
|
||||||
catch (IllegalArgumentException e) {
|
catch (IllegalArgumentException e) {
|
||||||
// fall back to anaylzer
|
// fall back to anaylzer
|
||||||
|
@ -410,3 +413,44 @@ class TokenOrderingFilter extends TokenFilter {
|
||||||
return queue.isEmpty() ? null : queue.removeFirst();
|
return queue.isEmpty() ? null : queue.removeFirst();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class TermOffsetsTokenStream {
|
||||||
|
|
||||||
|
TokenStream bufferedTokenStream = null;
|
||||||
|
Token bufferedToken;
|
||||||
|
int startOffset;
|
||||||
|
int endOffset;
|
||||||
|
|
||||||
|
public TermOffsetsTokenStream( TokenStream tstream ){
|
||||||
|
bufferedTokenStream = tstream;
|
||||||
|
startOffset = 0;
|
||||||
|
bufferedToken = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TokenStream getMultiValuedTokenStream( final int length ){
|
||||||
|
endOffset = startOffset + length;
|
||||||
|
return new TokenStream(){
|
||||||
|
Token token;
|
||||||
|
public Token next() throws IOException {
|
||||||
|
while( true ){
|
||||||
|
if( bufferedToken == null )
|
||||||
|
bufferedToken = bufferedTokenStream.next();
|
||||||
|
if( bufferedToken == null ) return null;
|
||||||
|
if( startOffset <= bufferedToken.startOffset() &&
|
||||||
|
bufferedToken.endOffset() <= endOffset ){
|
||||||
|
token = bufferedToken;
|
||||||
|
bufferedToken = null;
|
||||||
|
token.setStartOffset( token.startOffset() - startOffset );
|
||||||
|
token.setEndOffset( token.endOffset() - startOffset );
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
else if( bufferedToken.endOffset() > endOffset ){
|
||||||
|
startOffset += length + 1;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
bufferedToken = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -17,10 +17,16 @@
|
||||||
|
|
||||||
package org.apache.solr.highlight;
|
package org.apache.solr.highlight;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.util.*;
|
import org.apache.solr.util.*;
|
||||||
import org.apache.solr.common.params.HighlightParams;
|
import org.apache.solr.common.params.HighlightParams;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.StringReader;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -141,6 +147,47 @@ public class HighlighterTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testTermOffsetsTokenStream() throws Exception {
|
||||||
|
String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
|
||||||
|
Analyzer a1 = new WhitespaceAnalyzer();
|
||||||
|
TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
|
||||||
|
a1.tokenStream( "", new StringReader( "a b c d e f g h i j k l m n" ) ) );
|
||||||
|
for( String v : multivalued ){
|
||||||
|
TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
|
||||||
|
Analyzer a2 = new WhitespaceAnalyzer();
|
||||||
|
TokenStream ts2 = a2.tokenStream( "", new StringReader( v ) );
|
||||||
|
Token t1 = new Token();
|
||||||
|
Token t2 = new Token();
|
||||||
|
for( t1 = ts1.next( t1 ); t1 != null; t1 = ts1.next( t1 ) ){
|
||||||
|
t2 = ts2.next( t2 );
|
||||||
|
assertEquals( t2, t1 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTermVecMultiValuedHighlight() throws Exception {
|
||||||
|
|
||||||
|
// do summarization using term vectors on multivalued field
|
||||||
|
HashMap<String,String> args = new HashMap<String,String>();
|
||||||
|
args.put("hl", "true");
|
||||||
|
args.put("hl.fl", "tv_mv_text");
|
||||||
|
args.put("hl.snippets", "2");
|
||||||
|
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
|
||||||
|
"standard",0,200,args);
|
||||||
|
|
||||||
|
assertU(adoc("tv_mv_text", LONG_TEXT,
|
||||||
|
"tv_mv_text", LONG_TEXT,
|
||||||
|
"id", "1"));
|
||||||
|
assertU(commit());
|
||||||
|
assertU(optimize());
|
||||||
|
assertQ("Basic summarization",
|
||||||
|
sumLRF.makeRequest("tv_mv_text:long"),
|
||||||
|
"//lst[@name='highlighting']/lst[@name='1']",
|
||||||
|
"//lst[@name='1']/arr[@name='tv_mv_text']/str[.='a <em>long</em> days night this should be a piece of text which']",
|
||||||
|
"//arr[@name='tv_mv_text']/str[.=' <em>long</em> fragments.']"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
public void testDisMaxHighlight() {
|
public void testDisMaxHighlight() {
|
||||||
|
|
||||||
// same test run through dismax handler
|
// same test run through dismax handler
|
||||||
|
|
|
@ -455,6 +455,8 @@
|
||||||
<dynamicField name="t_*" type="text" indexed="true" stored="true"/>
|
<dynamicField name="t_*" type="text" indexed="true" stored="true"/>
|
||||||
<dynamicField name="tv_*" type="text" indexed="true" stored="true"
|
<dynamicField name="tv_*" type="text" indexed="true" stored="true"
|
||||||
termVectors="true" termPositions="true" termOffsets="true"/>
|
termVectors="true" termPositions="true" termOffsets="true"/>
|
||||||
|
<dynamicField name="tv_mv_*" type="text" indexed="true" stored="true" multivalued="true"
|
||||||
|
termVectors="true" termPositions="true" termOffsets="true"/>
|
||||||
|
|
||||||
<!-- special fields for dynamic copyField test -->
|
<!-- special fields for dynamic copyField test -->
|
||||||
<dynamicField name="dynamic_*" type="string" indexed="true" stored="true"/>
|
<dynamicField name="dynamic_*" type="string" indexed="true" stored="true"/>
|
||||||
|
|
Loading…
Reference in New Issue