diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 463298bc0d8..1c78069e70c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -108,6 +108,11 @@ Changes in backwards compatibility policy * LUCENE-2600: Remove IndexReader.isDeleted in favor of IndexReader.getDeletedDocs(). (Mike McCandless) +* LUCENE-2529, LUCENE-2668: Position increment gap and offset gap of empty + values in multi-valued field has been changed for some cases in index. + If you index empty fields and uses positions/offsets information on that + fields, reindex is recommended. (David Smiley, Koji Sekiguchi) + Changes in Runtime Behavior * LUCENE-2650: The behavior of FSDirectory.open has changed. On 64-bit diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java index 2c943a95ab4..2d90077d22f 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java @@ -156,8 +156,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder { int startOffset, int endOffset ){ while( buffer.length() < endOffset && index[0] < values.length ){ buffer.append( values[index[0]] ); - if( values[index[0]].length() > 0 && index[0] + 1 < values.length ) - buffer.append( multiValuedSeparator ); + buffer.append( multiValuedSeparator ); index[0]++; } int eo = buffer.length() < endOffset ? buffer.length() : endOffset; @@ -168,7 +167,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder { int startOffset, int endOffset ){ while( buffer.length() < endOffset && index[0] < values.length ){ buffer.append( values[index[0]].stringValue() ); - if( values[index[0]].isTokenized() && values[index[0]].stringValue().length() > 0 && index[0] + 1 < values.length ) + if( values[index[0]].isTokenized() ) buffer.append( multiValuedSeparator ); index[0]++; } diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java index 0e66e232fc3..85f95097c95 100644 --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java @@ -355,16 +355,20 @@ public abstract class AbstractTestCase extends LuceneTestCase { protected void makeIndexShortMV() throws Exception { + // 0 // "" + // 1 // "" - // 012345 + // 234567 // "a b c" // 0 1 2 - + + // 8 // "" - // 6789 + // 111 + // 9012 // "d e" // 3 4 make1dmfIndex( shortMVValues ); diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java index 1eb89d730b8..a8cedea0ec0 100644 --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java @@ -165,7 +165,7 @@ public class FieldPhraseListTest extends AbstractTestCase { FieldTermStack stack = new FieldTermStack( reader, 0, F, fq ); FieldPhraseList fpl = new FieldPhraseList( stack, fq ); assertEquals( 1, fpl.phraseList.size() ); - assertEquals( "d(1.0)((6,7))", fpl.phraseList.get( 0 ).toString() ); + assertEquals( "d(1.0)((9,10))", fpl.phraseList.get( 0 ).toString() ); } public void test1PhraseLongMV() throws Exception { diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java index e4343886bc3..e59bc41a082 100644 --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java @@ -132,7 +132,7 @@ public class FieldTermStackTest extends AbstractTestCase { FieldQuery fq = new FieldQuery( tq( "d" ), true, true ); FieldTermStack stack = new FieldTermStack( reader, 0, F, fq ); assertEquals( 1, stack.termList.size() ); - assertEquals( "d(6,7,3)", stack.pop().toString() ); + assertEquals( "d(9,10,3)", stack.pop().toString() ); } public void test1PhraseLongMV() throws Exception { diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java index 47ca7edf467..31bc37d7e5a 100644 --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java @@ -27,7 +27,7 @@ public class ScoreOrderFragmentsBuilderTest extends AbstractTestCase { String[] f = sofb.createFragments( reader, 0, F, ffl, 3 ); assertEquals( 3, f.length ); // check score order - assertEquals( "c a a b b", f[0] ); + assertEquals( "c a a b b ", f[0] ); assertEquals( "b b a b a b b b b b ", f[1] ); assertEquals( "a b b b b b b b b b ", f[2] ); } diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java index f756ffbef46..446c645eb61 100644 --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java @@ -143,7 +143,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.createFieldFragList( fpl, 100 ); assertEquals( 1, ffl.fragInfos.size() ); - assertEquals( "subInfos=(d((6,7)))/1.0(0,100)", ffl.fragInfos.get( 0 ).toString() ); + assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.fragInfos.get( 0 ).toString() ); } public void test1PhraseLongMV() throws Exception { diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java index 179bf81da0a..72c17d8ac24 100644 --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java @@ -34,11 +34,11 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase { public void test1TermIndex() throws Exception { FieldFragList ffl = ffl( "a", "a" ); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); - assertEquals( "a", sfb.createFragment( reader, 0, F, ffl ) ); + assertEquals( "a ", sfb.createFragment( reader, 0, F, ffl ) ); // change tags sfb = new SimpleFragmentsBuilder( new String[]{ "[" }, new String[]{ "]" } ); - assertEquals( "[a]", sfb.createFragment( reader, 0, F, ffl ) ); + assertEquals( "[a] ", sfb.createFragment( reader, 0, F, ffl ) ); } public void test2Frags() throws Exception { @@ -48,7 +48,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase { // 3 snippets requested, but should be 2 assertEquals( 2, f.length ); assertEquals( "a b b b b b b b b b ", f[0] ); - assertEquals( "b b a b a b", f[1] ); + assertEquals( "b b a b a b ", f[1] ); } public void test3Frags() throws Exception { @@ -58,7 +58,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase { assertEquals( 3, f.length ); assertEquals( "a b b b b b b b b b ", f[0] ); assertEquals( "b b a b a b b b b b ", f[1] ); - assertEquals( "c a a b b", f[2] ); + assertEquals( "c a a b b ", f[2] ); } public void testTagsAndEncoder() throws Exception { @@ -66,7 +66,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase { SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); String[] preTags = { "[" }; String[] postTags = { "]" }; - assertEquals( "<h1> [a] </h1>", + assertEquals( "<h1> [a] </h1> ", sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) ); } @@ -88,7 +88,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.createFieldFragList( fpl, 100 ); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); - assertEquals( "a b c d e", sfb.createFragment( reader, 0, F, ffl ) ); + assertEquals( " b c d e ", sfb.createFragment( reader, 0, F, ffl ) ); } public void test1PhraseLongMV() throws Exception { @@ -113,7 +113,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase { SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.createFieldFragList( fpl, 100 ); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); - assertEquals( "ssing speed, the", sfb.createFragment( reader, 0, F, ffl ) ); + assertEquals( "ssing speed, the ", sfb.createFragment( reader, 0, F, ffl ) ); } public void testUnstoredField() throws Exception { @@ -163,6 +163,6 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase { FieldFragList ffl = sflb.createFieldFragList( fpl, 100 ); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); sfb.setMultiValuedSeparator( '/' ); - assertEquals( "a b c/d e", sfb.createFragment( reader, 0, F, ffl ) ); + assertEquals( " b c//d e/", sfb.createFragment( reader, 0, F, ffl ) ); } } diff --git a/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java b/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java index 41d4db34cfd..7d66197514b 100644 --- a/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java +++ b/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java @@ -75,10 +75,8 @@ final class DocInverterPerField extends DocFieldConsumerPerField { // consumer if it wants to see this particular field // tokenized. if (field.isIndexed() && doInvert) { - - final boolean anyToken; - if (fieldState.length > 0) + if (i > 0) fieldState.position += docState.analyzer.getPositionIncrementGap(fieldInfo.name); if (!field.isTokenized()) { // un-tokenized field @@ -99,7 +97,6 @@ final class DocInverterPerField extends DocFieldConsumerPerField { fieldState.offset += valueLength; fieldState.length++; fieldState.position++; - anyToken = valueLength > 0; } else { // tokenized field final TokenStream stream; final TokenStream streamValue = field.tokenStreamValue(); @@ -189,14 +186,12 @@ final class DocInverterPerField extends DocFieldConsumerPerField { stream.end(); fieldState.offset += offsetAttribute.endOffset(); - anyToken = fieldState.length > startLength; } finally { stream.close(); } } - if (anyToken) - fieldState.offset += docState.analyzer.getOffsetGap(field); + fieldState.offset += docState.analyzer.getOffsetGap(field); fieldState.boost *= field.getBoost(); } diff --git a/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java b/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java index 8a2d76c7444..0c8f0306678 100644 --- a/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java +++ b/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java @@ -30,6 +30,7 @@ public final class MockAnalyzer extends Analyzer { private final boolean lowerCase; private final CharacterRunAutomaton filter; private final boolean enablePositionIncrements; + private int positionIncrementGap; /** * Creates a new MockAnalyzer. @@ -89,4 +90,13 @@ public final class MockAnalyzer extends Analyzer { return saved.filter; } } + + public void setPositionIncrementGap(int positionIncrementGap){ + this.positionIncrementGap = positionIncrementGap; + } + + @Override + public int getPositionIncrementGap(String fieldName){ + return positionIncrementGap; + } } \ No newline at end of file diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java index d4e53ecf664..f07bd88e037 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -4266,11 +4266,11 @@ public class TestIndexWriter extends LuceneTestCase { TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field")); TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0); assertEquals(1, termOffsets.length); - assertEquals(0, termOffsets[0].getStartOffset()); - assertEquals(6, termOffsets[0].getEndOffset()); + assertEquals(1, termOffsets[0].getStartOffset()); + assertEquals(7, termOffsets[0].getEndOffset()); termOffsets = tpv.getOffsets(1); - assertEquals(7, termOffsets[0].getStartOffset()); - assertEquals(10, termOffsets[0].getEndOffset()); + assertEquals(8, termOffsets[0].getStartOffset()); + assertEquals(11, termOffsets[0].getEndOffset()); r.close(); dir.close(); } @@ -4301,8 +4301,37 @@ public class TestIndexWriter extends LuceneTestCase { assertEquals(0, termOffsets[0].getStartOffset()); assertEquals(4, termOffsets[0].getEndOffset()); termOffsets = tpv.getOffsets(1); - assertEquals(5, termOffsets[0].getStartOffset()); - assertEquals(11, termOffsets[0].getEndOffset()); + assertEquals(6, termOffsets[0].getStartOffset()); + assertEquals(12, termOffsets[0].getEndOffset()); + r.close(); + dir.close(); + } + + // LUCENE-2529 + public void testPositionIncrementGapEmptyField() throws Exception { + Directory dir = newDirectory(); + MockAnalyzer analyzer = new MockAnalyzer(); + analyzer.setPositionIncrementGap( 100 ); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, analyzer)); + Document doc = new Document(); + Field f = newField("field", "", Field.Store.NO, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS); + Field f2 = newField("field", "crunch man", Field.Store.NO, + Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS); + doc.add(f); + doc.add(f2); + w.addDocument(doc); + w.close(); + + IndexReader r = IndexReader.open(dir, true); + TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field")); + int[] poss = tpv.getTermPositions(0); + assertEquals(1, poss.length); + assertEquals(100, poss[0]); + poss = tpv.getTermPositions(1); + assertEquals(1, poss.length); + assertEquals(101, poss[0]); r.close(); dir.close(); }