mirror of https://github.com/apache/lucene.git
LUCENE-2278: FastVectorHighlighter: highlighted term is out of alignment in multi-valued NOT_ANALYZED field
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@916090 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4eef6adaaf
commit
c831eb9c14
|
@ -48,6 +48,9 @@ Bug fixes
|
|||
the previous code did not always implement the Snowball algorithm correctly.
|
||||
Additionally, for Version > 3.0, the Snowball stopword lists are used by
|
||||
default. (Robert Muir, Uwe Schindler, Simon Willnauer)
|
||||
|
||||
* LUCENE-2278: FastVectorHighlighter: Highlighted term is out of alignment
|
||||
in multi-valued NOT_ANALYZED field. (Koji Sekiguchi)
|
||||
|
||||
API Changes
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.MapFieldSelector;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
|
||||
|
@ -72,7 +73,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
|
|||
List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.fragInfos );
|
||||
|
||||
List<String> fragments = new ArrayList<String>( maxNumFragments );
|
||||
String[] values = getFieldValues( reader, docId, fieldName );
|
||||
Field[] values = getFields( reader, docId, fieldName );
|
||||
if( values.length == 0 ) return null;
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
int[] nextValueIndex = { 0 };
|
||||
|
@ -83,15 +84,31 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
|
|||
return fragments.toArray( new String[fragments.size()] );
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
protected String[] getFieldValues( IndexReader reader, int docId, String fieldName) throws IOException {
|
||||
Document doc = reader.document( docId, new MapFieldSelector( new String[]{ fieldName } ) );
|
||||
return doc.getValues( fieldName ); // according to Document class javadoc, this never returns null
|
||||
}
|
||||
|
||||
protected Field[] getFields( IndexReader reader, int docId, String fieldName) throws IOException {
|
||||
// according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded field???
|
||||
Document doc = reader.document( docId, new MapFieldSelector( new String[]{ fieldName } ) );
|
||||
return doc.getFields( fieldName ); // according to Document class javadoc, this never returns null
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
protected String makeFragment( StringBuilder buffer, int[] index, String[] values, WeightedFragInfo fragInfo ){
|
||||
StringBuilder fragment = new StringBuilder();
|
||||
final int s = fragInfo.startOffset;
|
||||
String src = getFragmentSource( buffer, index, values, s, fragInfo.endOffset );
|
||||
return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s );
|
||||
}
|
||||
|
||||
protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo ){
|
||||
final int s = fragInfo.startOffset;
|
||||
return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s );
|
||||
}
|
||||
|
||||
private String makeFragment( WeightedFragInfo fragInfo, String src, int s ){
|
||||
StringBuilder fragment = new StringBuilder();
|
||||
int srcIndex = 0;
|
||||
for( SubInfo subInfo : fragInfo.subInfos ){
|
||||
for( Toffs to : subInfo.termsOffsets ){
|
||||
|
@ -104,6 +121,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
|
|||
return fragment.toString();
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
protected String getFragmentSource( StringBuilder buffer, int[] index, String[] values,
|
||||
int startOffset, int endOffset ){
|
||||
while( buffer.length() < endOffset && index[0] < values.length ){
|
||||
|
@ -114,6 +132,17 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
|
|||
int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
|
||||
return buffer.substring( startOffset, eo );
|
||||
}
|
||||
|
||||
protected String getFragmentSource( StringBuilder buffer, int[] index, Field[] values,
|
||||
int startOffset, int endOffset ){
|
||||
while( buffer.length() < endOffset && index[0] < values.length ){
|
||||
if( index[0] > 0 && values[index[0]].isTokenized() && values[index[0]].stringValue().length() > 0 )
|
||||
buffer.append( ' ' );
|
||||
buffer.append( values[index[0]++].stringValue() );
|
||||
}
|
||||
int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
|
||||
return buffer.substring( startOffset, eo );
|
||||
}
|
||||
|
||||
protected String getPreTag( int num ){
|
||||
return preTags.length > num ? preTags[num] : preTags[0];
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.Collection;
|
|||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
@ -56,6 +57,7 @@ public abstract class AbstractTestCase extends TestCase {
|
|||
protected Directory dir;
|
||||
protected Analyzer analyzerW;
|
||||
protected Analyzer analyzerB;
|
||||
protected Analyzer analyzerK;
|
||||
protected IndexReader reader;
|
||||
protected QueryParser paW;
|
||||
protected QueryParser paB;
|
||||
|
@ -76,11 +78,18 @@ public abstract class AbstractTestCase extends TestCase {
|
|||
"\nLucene/Solr does not require such additional hardware.",
|
||||
"\nWhen you talk about processing speed, the"
|
||||
};
|
||||
|
||||
protected static final String[] strMVValues = {
|
||||
"abc",
|
||||
"defg",
|
||||
"hijkl"
|
||||
};
|
||||
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
analyzerW = new WhitespaceAnalyzer(Version.LUCENE_CURRENT);
|
||||
analyzerB = new BigramAnalyzer();
|
||||
analyzerK = new KeywordAnalyzer();
|
||||
paW = new QueryParser(Version.LUCENE_CURRENT, F, analyzerW );
|
||||
paB = new QueryParser(Version.LUCENE_CURRENT, F, analyzerB );
|
||||
dir = new RAMDirectory();
|
||||
|
@ -314,6 +323,7 @@ public abstract class AbstractTestCase extends TestCase {
|
|||
make1dmfIndex( analyzerB, values );
|
||||
}
|
||||
|
||||
// make 1 doc with multi valued field
|
||||
protected void make1dmfIndex( Analyzer analyzer, String... values ) throws Exception {
|
||||
IndexWriter writer = new IndexWriter( dir, analyzer, true, MaxFieldLength.LIMITED );
|
||||
Document doc = new Document();
|
||||
|
@ -325,6 +335,18 @@ public abstract class AbstractTestCase extends TestCase {
|
|||
reader = IndexReader.open( dir, true );
|
||||
}
|
||||
|
||||
// make 1 doc with multi valued & not analyzed field
|
||||
protected void make1dmfIndexNA( String... values ) throws Exception {
|
||||
IndexWriter writer = new IndexWriter( dir, analyzerK, true, MaxFieldLength.LIMITED );
|
||||
Document doc = new Document();
|
||||
for( String value: values )
|
||||
doc.add( new Field( F, value, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
|
||||
writer.addDocument( doc );
|
||||
writer.close();
|
||||
|
||||
reader = IndexReader.open( dir, true );
|
||||
}
|
||||
|
||||
protected void makeIndexShortMV() throws Exception {
|
||||
|
||||
// 012345
|
||||
|
@ -386,4 +408,18 @@ public abstract class AbstractTestCase extends TestCase {
|
|||
|
||||
make1dmfIndexB( biMVValues );
|
||||
}
|
||||
|
||||
protected void makeIndexStrMV() throws Exception {
|
||||
|
||||
// 0123
|
||||
// "abc"
|
||||
|
||||
// 34567
|
||||
// "defg"
|
||||
|
||||
// 111
|
||||
// 789012
|
||||
// "hijkl"
|
||||
make1dmfIndexNA( strMVValues );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -127,4 +127,16 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
|
||||
reader = IndexReader.open( dir, true );
|
||||
}
|
||||
|
||||
public void test1StrMV() throws Exception {
|
||||
makeIndexStrMV();
|
||||
|
||||
FieldQuery fq = new FieldQuery( tq( "defg" ), true, true );
|
||||
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
||||
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
||||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||
assertEquals( "abc<b>defg</b>hijkl", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue