From 1b82292e02aa6a85eae34f272d9468f00157c8d0 Mon Sep 17 00:00:00 2001 From: Koji Sekiguchi Date: Tue, 17 Jan 2012 16:43:23 +0000 Subject: [PATCH] LUCENE-3698: FVH adds a multi value separator to the end of the highlighted text git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1232470 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/contrib/CHANGES.txt | 5 +++++ .../vectorhighlight/BaseFragmentsBuilder.java | 7 ++++++- .../ScoreOrderFragmentsBuilderTest.java | 2 +- .../SimpleFragmentsBuilderTest.java | 18 +++++++++--------- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt index 0fa689846b0..5a4a314dd38 100644 --- a/lucene/contrib/CHANGES.txt +++ b/lucene/contrib/CHANGES.txt @@ -105,6 +105,11 @@ Changes in backwards compatibility policy * LUCENE-3626: The internal implementation classes in PKIndexSplitter and MultiPassIndexSplitter were made private as they now work per segment. (Uwe Schindler) + +Changes in Runtime Behavior + + * LUCENE-3698: FastVectorHighlighter no longer adds a multi value separator + to the end of the highlighted text. (Shay Banon via Koji Sekiguchi) New Features diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java index f6b2e61ade6..67b361e051e 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java @@ -166,7 +166,12 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder { buffer.append( values[index[0]++].stringValue() ); buffer.append( getMultiValuedSeparator() ); } - int eo = buffer.length() < endOffset ? buffer.length() : boundaryScanner.findEndOffset( buffer, endOffset ); + int bufferLength = buffer.length(); + // we added the multi value char to the last buffer, ignore it + if (values[index[0] - 1].fieldType().tokenized()) { + bufferLength--; + } + int eo = bufferLength < endOffset ? bufferLength : boundaryScanner.findEndOffset( buffer, endOffset ); modifiedStartOffset[0] = boundaryScanner.findStartOffset( buffer, startOffset ); return buffer.substring( modifiedStartOffset[0], eo ); } diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java index 84dfe4a2060..1e7fe993310 100644 --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java @@ -35,7 +35,7 @@ public class ScoreOrderFragmentsBuilderTest extends AbstractTestCase { String[] f = sofb.createFragments( reader, 0, F, ffl, 3 ); assertEquals( 3, f.length ); // check score order - assertEquals( "c a a b b ", f[0] ); + assertEquals( "c a a b b", f[0] ); assertEquals( "b b a b a b b b b b c", f[1] ); assertEquals( "a b b b b b b b b b b", f[2] ); } diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java index 2e60cd5a922..9ff5b9805fd 100644 --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java @@ -37,11 +37,11 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase { public void test1TermIndex() throws Exception { FieldFragList ffl = ffl(new TermQuery(new Term(F, "a")), "a" ); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); - assertEquals( "a ", sfb.createFragment( reader, 0, F, ffl ) ); + assertEquals( "a", sfb.createFragment( reader, 0, F, ffl ) ); // change tags sfb = new SimpleFragmentsBuilder( new String[]{ "[" }, new String[]{ "]" } ); - assertEquals( "[a] ", sfb.createFragment( reader, 0, F, ffl ) ); + assertEquals( "[a]", sfb.createFragment( reader, 0, F, ffl ) ); } public void test2Frags() throws Exception { @@ -51,7 +51,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase { // 3 snippets requested, but should be 2 assertEquals( 2, f.length ); assertEquals( "a b b b b b b b b b b", f[0] ); - assertEquals( "b b a b a b ", f[1] ); + assertEquals( "b b a b a b", f[1] ); } public void test3Frags() throws Exception { @@ -65,7 +65,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase { assertEquals( 3, f.length ); assertEquals( "a b b b b b b b b b b", f[0] ); assertEquals( "b b a b a b b b b b c", f[1] ); - assertEquals( "c a a b b ", f[2] ); + assertEquals( "c a a b b", f[2] ); } public void testTagsAndEncoder() throws Exception { @@ -73,7 +73,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase { SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); String[] preTags = { "[" }; String[] postTags = { "]" }; - assertEquals( "<h1> [a] </h1> ", + assertEquals( "<h1> [a] </h1>", sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) ); } @@ -94,7 +94,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.createFieldFragList( fpl, 100 ); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); - assertEquals( "a b c d e ", sfb.createFragment( reader, 0, F, ffl ) ); + assertEquals( "a b c d e", sfb.createFragment( reader, 0, F, ffl ) ); } public void test1PhraseLongMV() throws Exception { @@ -119,7 +119,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.createFieldFragList( fpl, 100 ); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); - assertEquals( "processing speed, the ", sfb.createFragment( reader, 0, F, ffl ) ); + assertEquals( "processing speed, the", sfb.createFragment( reader, 0, F, ffl ) ); } public void testUnstoredField() throws Exception { @@ -160,7 +160,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase { FieldFragList ffl = sflb.createFieldFragList( fpl, 100 ); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); sfb.setMultiValuedSeparator( '/' ); - assertEquals( "abc/defg/hijkl/", sfb.createFragment( reader, 0, F, ffl ) ); + assertEquals( "abc/defg/hijkl", sfb.createFragment( reader, 0, F, ffl ) ); } public void testMVSeparator() throws Exception { @@ -173,6 +173,6 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase { FieldFragList ffl = sflb.createFieldFragList( fpl, 100 ); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); sfb.setMultiValuedSeparator( '/' ); - assertEquals( " b c//d e/", sfb.createFragment( reader, 0, F, ffl ) ); + assertEquals( " b c//d e", sfb.createFragment( reader, 0, F, ffl ) ); } }