LUCENE-2278: FastVectorHighlighter: highlighted term is out of alignment in multi-valued NOT_ANALYZED field

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@916090 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Koji Sekiguchi 2010-02-25 02:24:31 +00:00
parent 4eef6adaaf
commit c831eb9c14
4 changed files with 83 additions and 3 deletions

View File

@ -48,6 +48,9 @@ Bug fixes
the previous code did not always implement the Snowball algorithm correctly.
Additionally, for Version > 3.0, the Snowball stopword lists are used by
default. (Robert Muir, Uwe Schindler, Simon Willnauer)
* LUCENE-2278: FastVectorHighlighter: Highlighted term is out of alignment
in multi-valued NOT_ANALYZED field. (Koji Sekiguchi)
API Changes

View File

@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.MapFieldSelector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
@ -72,7 +73,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.fragInfos );
List<String> fragments = new ArrayList<String>( maxNumFragments );
String[] values = getFieldValues( reader, docId, fieldName );
Field[] values = getFields( reader, docId, fieldName );
if( values.length == 0 ) return null;
StringBuilder buffer = new StringBuilder();
int[] nextValueIndex = { 0 };
@ -83,15 +84,31 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
return fragments.toArray( new String[fragments.size()] );
}
@Deprecated
protected String[] getFieldValues( IndexReader reader, int docId, String fieldName) throws IOException {
Document doc = reader.document( docId, new MapFieldSelector( new String[]{ fieldName } ) );
return doc.getValues( fieldName ); // according to Document class javadoc, this never returns null
}
protected Field[] getFields( IndexReader reader, int docId, String fieldName) throws IOException {
// according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded field???
Document doc = reader.document( docId, new MapFieldSelector( new String[]{ fieldName } ) );
return doc.getFields( fieldName ); // according to Document class javadoc, this never returns null
}
@Deprecated
protected String makeFragment( StringBuilder buffer, int[] index, String[] values, WeightedFragInfo fragInfo ){
StringBuilder fragment = new StringBuilder();
final int s = fragInfo.startOffset;
String src = getFragmentSource( buffer, index, values, s, fragInfo.endOffset );
return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s );
}
protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo ){
final int s = fragInfo.startOffset;
return makeFragment( fragInfo, getFragmentSource( buffer, index, values, s, fragInfo.endOffset ), s );
}
private String makeFragment( WeightedFragInfo fragInfo, String src, int s ){
StringBuilder fragment = new StringBuilder();
int srcIndex = 0;
for( SubInfo subInfo : fragInfo.subInfos ){
for( Toffs to : subInfo.termsOffsets ){
@ -104,6 +121,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
return fragment.toString();
}
@Deprecated
protected String getFragmentSource( StringBuilder buffer, int[] index, String[] values,
int startOffset, int endOffset ){
while( buffer.length() < endOffset && index[0] < values.length ){
@ -114,6 +132,17 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
return buffer.substring( startOffset, eo );
}
protected String getFragmentSource( StringBuilder buffer, int[] index, Field[] values,
int startOffset, int endOffset ){
while( buffer.length() < endOffset && index[0] < values.length ){
if( index[0] > 0 && values[index[0]].isTokenized() && values[index[0]].stringValue().length() > 0 )
buffer.append( ' ' );
buffer.append( values[index[0]++].stringValue() );
}
int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
return buffer.substring( startOffset, eo );
}
protected String getPreTag( int num ){
return preTags.length > num ? preTags[num] : preTags[0];

View File

@ -24,6 +24,7 @@ import java.util.Collection;
import junit.framework.TestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@ -56,6 +57,7 @@ public abstract class AbstractTestCase extends TestCase {
protected Directory dir;
protected Analyzer analyzerW;
protected Analyzer analyzerB;
protected Analyzer analyzerK;
protected IndexReader reader;
protected QueryParser paW;
protected QueryParser paB;
@ -76,11 +78,18 @@ public abstract class AbstractTestCase extends TestCase {
"\nLucene/Solr does not require such additional hardware.",
"\nWhen you talk about processing speed, the"
};
protected static final String[] strMVValues = {
"abc",
"defg",
"hijkl"
};
@Override
protected void setUp() throws Exception {
analyzerW = new WhitespaceAnalyzer(Version.LUCENE_CURRENT);
analyzerB = new BigramAnalyzer();
analyzerK = new KeywordAnalyzer();
paW = new QueryParser(Version.LUCENE_CURRENT, F, analyzerW );
paB = new QueryParser(Version.LUCENE_CURRENT, F, analyzerB );
dir = new RAMDirectory();
@ -314,6 +323,7 @@ public abstract class AbstractTestCase extends TestCase {
make1dmfIndex( analyzerB, values );
}
// make 1 doc with multi valued field
protected void make1dmfIndex( Analyzer analyzer, String... values ) throws Exception {
IndexWriter writer = new IndexWriter( dir, analyzer, true, MaxFieldLength.LIMITED );
Document doc = new Document();
@ -325,6 +335,18 @@ public abstract class AbstractTestCase extends TestCase {
reader = IndexReader.open( dir, true );
}
// make 1 doc with multi valued & not analyzed field
protected void make1dmfIndexNA( String... values ) throws Exception {
IndexWriter writer = new IndexWriter( dir, analyzerK, true, MaxFieldLength.LIMITED );
Document doc = new Document();
for( String value: values )
doc.add( new Field( F, value, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
writer.addDocument( doc );
writer.close();
reader = IndexReader.open( dir, true );
}
protected void makeIndexShortMV() throws Exception {
// 012345
@ -386,4 +408,18 @@ public abstract class AbstractTestCase extends TestCase {
make1dmfIndexB( biMVValues );
}
protected void makeIndexStrMV() throws Exception {
// 0123
// "abc"
// 34567
// "defg"
// 111
// 789012
// "hijkl"
make1dmfIndexNA( strMVValues );
}
}

View File

@ -127,4 +127,16 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
reader = IndexReader.open( dir, true );
}
public void test1StrMV() throws Exception {
makeIndexStrMV();
FieldQuery fq = new FieldQuery( tq( "defg" ), true, true );
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
assertEquals( "abc<b>defg</b>hijkl", sfb.createFragment( reader, 0, F, ffl ) );
}
}