LUCENE-2529, LUCENE-2668: always apply position increment gap and offset gap between values

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1001796 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Koji Sekiguchi 2010-09-27 16:10:29 +00:00
parent 87496c7a6d
commit 13ea7a89c7
11 changed files with 73 additions and 31 deletions

View File

@ -108,6 +108,11 @@ Changes in backwards compatibility policy
* LUCENE-2600: Remove IndexReader.isDeleted in favor of
IndexReader.getDeletedDocs(). (Mike McCandless)
* LUCENE-2529, LUCENE-2668: Position increment gap and offset gap of empty
values in multi-valued field has been changed for some cases in index.
If you index empty fields and uses positions/offsets information on that
fields, reindex is recommended. (David Smiley, Koji Sekiguchi)
Changes in Runtime Behavior
* LUCENE-2650: The behavior of FSDirectory.open has changed. On 64-bit

View File

@ -156,8 +156,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
int startOffset, int endOffset ){
while( buffer.length() < endOffset && index[0] < values.length ){
buffer.append( values[index[0]] );
if( values[index[0]].length() > 0 && index[0] + 1 < values.length )
buffer.append( multiValuedSeparator );
buffer.append( multiValuedSeparator );
index[0]++;
}
int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
@ -168,7 +167,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
int startOffset, int endOffset ){
while( buffer.length() < endOffset && index[0] < values.length ){
buffer.append( values[index[0]].stringValue() );
if( values[index[0]].isTokenized() && values[index[0]].stringValue().length() > 0 && index[0] + 1 < values.length )
if( values[index[0]].isTokenized() )
buffer.append( multiValuedSeparator );
index[0]++;
}

View File

@ -355,16 +355,20 @@ public abstract class AbstractTestCase extends LuceneTestCase {
protected void makeIndexShortMV() throws Exception {
// 0
// ""
// 1
// ""
// 012345
// 234567
// "a b c"
// 0 1 2
// 8
// ""
// 6789
// 111
// 9012
// "d e"
// 3 4
make1dmfIndex( shortMVValues );

View File

@ -165,7 +165,7 @@ public class FieldPhraseListTest extends AbstractTestCase {
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
assertEquals( 1, fpl.phraseList.size() );
assertEquals( "d(1.0)((6,7))", fpl.phraseList.get( 0 ).toString() );
assertEquals( "d(1.0)((9,10))", fpl.phraseList.get( 0 ).toString() );
}
public void test1PhraseLongMV() throws Exception {

View File

@ -132,7 +132,7 @@ public class FieldTermStackTest extends AbstractTestCase {
FieldQuery fq = new FieldQuery( tq( "d" ), true, true );
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
assertEquals( 1, stack.termList.size() );
assertEquals( "d(6,7,3)", stack.pop().toString() );
assertEquals( "d(9,10,3)", stack.pop().toString() );
}
public void test1PhraseLongMV() throws Exception {

View File

@ -27,7 +27,7 @@ public class ScoreOrderFragmentsBuilderTest extends AbstractTestCase {
String[] f = sofb.createFragments( reader, 0, F, ffl, 3 );
assertEquals( 3, f.length );
// check score order
assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b", f[0] );
assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b ", f[0] );
assertEquals( "b b <b>a</b> b <b>a</b> b b b b b ", f[1] );
assertEquals( "<b>a</b> b b b b b b b b b ", f[2] );
}

View File

@ -143,7 +143,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
assertEquals( 1, ffl.fragInfos.size() );
assertEquals( "subInfos=(d((6,7)))/1.0(0,100)", ffl.fragInfos.get( 0 ).toString() );
assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.fragInfos.get( 0 ).toString() );
}
public void test1PhraseLongMV() throws Exception {

View File

@ -34,11 +34,11 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
public void test1TermIndex() throws Exception {
FieldFragList ffl = ffl( "a", "a" );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
assertEquals( "<b>a</b>", sfb.createFragment( reader, 0, F, ffl ) );
assertEquals( "<b>a</b> ", sfb.createFragment( reader, 0, F, ffl ) );
// change tags
sfb = new SimpleFragmentsBuilder( new String[]{ "[" }, new String[]{ "]" } );
assertEquals( "[a]", sfb.createFragment( reader, 0, F, ffl ) );
assertEquals( "[a] ", sfb.createFragment( reader, 0, F, ffl ) );
}
public void test2Frags() throws Exception {
@ -48,7 +48,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
// 3 snippets requested, but should be 2
assertEquals( 2, f.length );
assertEquals( "<b>a</b> b b b b b b b b b ", f[0] );
assertEquals( "b b <b>a</b> b <b>a</b> b", f[1] );
assertEquals( "b b <b>a</b> b <b>a</b> b ", f[1] );
}
public void test3Frags() throws Exception {
@ -58,7 +58,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
assertEquals( 3, f.length );
assertEquals( "<b>a</b> b b b b b b b b b ", f[0] );
assertEquals( "b b <b>a</b> b <b>a</b> b b b b b ", f[1] );
assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b", f[2] );
assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b ", f[2] );
}
public void testTagsAndEncoder() throws Exception {
@ -66,7 +66,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
String[] preTags = { "[" };
String[] postTags = { "]" };
assertEquals( "&lt;h1&gt; [a] &lt;/h1&gt;",
assertEquals( "&lt;h1&gt; [a] &lt;/h1&gt; ",
sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
}
@ -88,7 +88,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
assertEquals( "a b c <b>d</b> e", sfb.createFragment( reader, 0, F, ffl ) );
assertEquals( " b c <b>d</b> e ", sfb.createFragment( reader, 0, F, ffl ) );
}
public void test1PhraseLongMV() throws Exception {
@ -113,7 +113,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
assertEquals( "ssing <b>speed</b>, the", sfb.createFragment( reader, 0, F, ffl ) );
assertEquals( "ssing <b>speed</b>, the ", sfb.createFragment( reader, 0, F, ffl ) );
}
public void testUnstoredField() throws Exception {
@ -163,6 +163,6 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
sfb.setMultiValuedSeparator( '/' );
assertEquals( "a b c/<b>d</b> e", sfb.createFragment( reader, 0, F, ffl ) );
assertEquals( " b c//<b>d</b> e/", sfb.createFragment( reader, 0, F, ffl ) );
}
}

View File

@ -75,10 +75,8 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
// consumer if it wants to see this particular field
// tokenized.
if (field.isIndexed() && doInvert) {
final boolean anyToken;
if (fieldState.length > 0)
if (i > 0)
fieldState.position += docState.analyzer.getPositionIncrementGap(fieldInfo.name);
if (!field.isTokenized()) { // un-tokenized field
@ -99,7 +97,6 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
fieldState.offset += valueLength;
fieldState.length++;
fieldState.position++;
anyToken = valueLength > 0;
} else { // tokenized field
final TokenStream stream;
final TokenStream streamValue = field.tokenStreamValue();
@ -189,14 +186,12 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
stream.end();
fieldState.offset += offsetAttribute.endOffset();
anyToken = fieldState.length > startLength;
} finally {
stream.close();
}
}
if (anyToken)
fieldState.offset += docState.analyzer.getOffsetGap(field);
fieldState.offset += docState.analyzer.getOffsetGap(field);
fieldState.boost *= field.getBoost();
}

View File

@ -30,6 +30,7 @@ public final class MockAnalyzer extends Analyzer {
private final boolean lowerCase;
private final CharacterRunAutomaton filter;
private final boolean enablePositionIncrements;
private int positionIncrementGap;
/**
* Creates a new MockAnalyzer.
@ -89,4 +90,13 @@ public final class MockAnalyzer extends Analyzer {
return saved.filter;
}
}
public void setPositionIncrementGap(int positionIncrementGap){
this.positionIncrementGap = positionIncrementGap;
}
@Override
public int getPositionIncrementGap(String fieldName){
return positionIncrementGap;
}
}

View File

@ -4266,11 +4266,11 @@ public class TestIndexWriter extends LuceneTestCase {
TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field"));
TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0);
assertEquals(1, termOffsets.length);
assertEquals(0, termOffsets[0].getStartOffset());
assertEquals(6, termOffsets[0].getEndOffset());
assertEquals(1, termOffsets[0].getStartOffset());
assertEquals(7, termOffsets[0].getEndOffset());
termOffsets = tpv.getOffsets(1);
assertEquals(7, termOffsets[0].getStartOffset());
assertEquals(10, termOffsets[0].getEndOffset());
assertEquals(8, termOffsets[0].getStartOffset());
assertEquals(11, termOffsets[0].getEndOffset());
r.close();
dir.close();
}
@ -4301,8 +4301,37 @@ public class TestIndexWriter extends LuceneTestCase {
assertEquals(0, termOffsets[0].getStartOffset());
assertEquals(4, termOffsets[0].getEndOffset());
termOffsets = tpv.getOffsets(1);
assertEquals(5, termOffsets[0].getStartOffset());
assertEquals(11, termOffsets[0].getEndOffset());
assertEquals(6, termOffsets[0].getStartOffset());
assertEquals(12, termOffsets[0].getEndOffset());
r.close();
dir.close();
}
// LUCENE-2529
public void testPositionIncrementGapEmptyField() throws Exception {
Directory dir = newDirectory();
MockAnalyzer analyzer = new MockAnalyzer();
analyzer.setPositionIncrementGap( 100 );
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, analyzer));
Document doc = new Document();
Field f = newField("field", "", Field.Store.NO,
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
Field f2 = newField("field", "crunch man", Field.Store.NO,
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
doc.add(f);
doc.add(f2);
w.addDocument(doc);
w.close();
IndexReader r = IndexReader.open(dir, true);
TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field"));
int[] poss = tpv.getTermPositions(0);
assertEquals(1, poss.length);
assertEquals(100, poss[0]);
poss = tpv.getTermPositions(1);
assertEquals(1, poss.length);
assertEquals(101, poss[0]);
r.close();
dir.close();
}