diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt index bedd2f5d96a..c857d3bea31 100644 --- a/lucene/contrib/CHANGES.txt +++ b/lucene/contrib/CHANGES.txt @@ -112,6 +112,10 @@ API Changes such as stopword lists and any language-specific processing in addition to stemming. Add Turkish and Romanian stopwords lists to support this. (Robert Muir, Uwe Schindler, Simon Willnauer) + + * LUCENE-2603: Add setMultiValuedSeparator(char) method to set an arbitrary + char that is used when concatenating multiValued data. Default is a space + (' '). It is applied on ANALYZED field only. (Koji Sekiguchi) New features diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java index 6167a1c55d4..d215a144bdc 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java @@ -44,6 +44,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder { "", "" }; public static final String[] COLORED_POST_TAGS = { "" }; + private char multiValuedSeparator = ' '; protected BaseFragmentsBuilder(){ this( new String[]{ "" }, new String[]{ "" } ); @@ -155,7 +156,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder { int startOffset, int endOffset ){ while( buffer.length() < endOffset && index[0] < values.length ){ if( index[0] > 0 && values[index[0]].length() > 0 ) - buffer.append( ' ' ); + buffer.append( multiValuedSeparator ); buffer.append( values[index[0]++] ); } int eo = buffer.length() < endOffset ? buffer.length() : endOffset; @@ -166,12 +167,20 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder { int startOffset, int endOffset ){ while( buffer.length() < endOffset && index[0] < values.length ){ if( index[0] > 0 && values[index[0]].isTokenized() && values[index[0]].stringValue().length() > 0 ) - buffer.append( ' ' ); + buffer.append( multiValuedSeparator ); buffer.append( values[index[0]++].stringValue() ); } int eo = buffer.length() < endOffset ? buffer.length() : endOffset; return buffer.substring( startOffset, eo ); } + + public void setMultiValuedSeparator( char separator ){ + multiValuedSeparator = separator; + } + + public char getMultiValuedSeparator(){ + return multiValuedSeparator; + } protected String getPreTag( int num ){ return getPreTag( preTags, num ); diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java index 73d0c9c736b..179bf81da0a 100644 --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java @@ -148,6 +148,21 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.createFieldFragList( fpl, 100 ); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); + // '/' separator doesn't effect the snippet because of NOT_ANALYZED field + sfb.setMultiValuedSeparator( '/' ); assertEquals( "abcdefghijkl", sfb.createFragment( reader, 0, F, ffl ) ); } + + public void testMVSeparator() throws Exception { + makeIndexShortMV(); + + FieldQuery fq = new FieldQuery( tq( "d" ), true, true ); + FieldTermStack stack = new FieldTermStack( reader, 0, F, fq ); + FieldPhraseList fpl = new FieldPhraseList( stack, fq ); + SimpleFragListBuilder sflb = new SimpleFragListBuilder(); + FieldFragList ffl = sflb.createFieldFragList( fpl, 100 ); + SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); + sfb.setMultiValuedSeparator( '/' ); + assertEquals( "a b c/d e", sfb.createFragment( reader, 0, F, ffl ) ); + } }