mirror of https://github.com/apache/lucene.git
LUCENE-2529, LUCENE-2668: always apply position increment gap and offset gap between values
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1001796 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
87496c7a6d
commit
13ea7a89c7
|
@ -108,6 +108,11 @@ Changes in backwards compatibility policy
|
|||
* LUCENE-2600: Remove IndexReader.isDeleted in favor of
|
||||
IndexReader.getDeletedDocs(). (Mike McCandless)
|
||||
|
||||
* LUCENE-2529, LUCENE-2668: Position increment gap and offset gap of empty
|
||||
values in multi-valued field has been changed for some cases in index.
|
||||
If you index empty fields and uses positions/offsets information on that
|
||||
fields, reindex is recommended. (David Smiley, Koji Sekiguchi)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-2650: The behavior of FSDirectory.open has changed. On 64-bit
|
||||
|
|
|
@ -156,8 +156,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
|
|||
int startOffset, int endOffset ){
|
||||
while( buffer.length() < endOffset && index[0] < values.length ){
|
||||
buffer.append( values[index[0]] );
|
||||
if( values[index[0]].length() > 0 && index[0] + 1 < values.length )
|
||||
buffer.append( multiValuedSeparator );
|
||||
buffer.append( multiValuedSeparator );
|
||||
index[0]++;
|
||||
}
|
||||
int eo = buffer.length() < endOffset ? buffer.length() : endOffset;
|
||||
|
@ -168,7 +167,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
|
|||
int startOffset, int endOffset ){
|
||||
while( buffer.length() < endOffset && index[0] < values.length ){
|
||||
buffer.append( values[index[0]].stringValue() );
|
||||
if( values[index[0]].isTokenized() && values[index[0]].stringValue().length() > 0 && index[0] + 1 < values.length )
|
||||
if( values[index[0]].isTokenized() )
|
||||
buffer.append( multiValuedSeparator );
|
||||
index[0]++;
|
||||
}
|
||||
|
|
|
@ -355,16 +355,20 @@ public abstract class AbstractTestCase extends LuceneTestCase {
|
|||
|
||||
protected void makeIndexShortMV() throws Exception {
|
||||
|
||||
// 0
|
||||
// ""
|
||||
// 1
|
||||
// ""
|
||||
|
||||
// 012345
|
||||
// 234567
|
||||
// "a b c"
|
||||
// 0 1 2
|
||||
|
||||
// 8
|
||||
// ""
|
||||
|
||||
// 6789
|
||||
// 111
|
||||
// 9012
|
||||
// "d e"
|
||||
// 3 4
|
||||
make1dmfIndex( shortMVValues );
|
||||
|
|
|
@ -165,7 +165,7 @@ public class FieldPhraseListTest extends AbstractTestCase {
|
|||
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
||||
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
|
||||
assertEquals( 1, fpl.phraseList.size() );
|
||||
assertEquals( "d(1.0)((6,7))", fpl.phraseList.get( 0 ).toString() );
|
||||
assertEquals( "d(1.0)((9,10))", fpl.phraseList.get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test1PhraseLongMV() throws Exception {
|
||||
|
|
|
@ -132,7 +132,7 @@ public class FieldTermStackTest extends AbstractTestCase {
|
|||
FieldQuery fq = new FieldQuery( tq( "d" ), true, true );
|
||||
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
|
||||
assertEquals( 1, stack.termList.size() );
|
||||
assertEquals( "d(6,7,3)", stack.pop().toString() );
|
||||
assertEquals( "d(9,10,3)", stack.pop().toString() );
|
||||
}
|
||||
|
||||
public void test1PhraseLongMV() throws Exception {
|
||||
|
|
|
@ -27,7 +27,7 @@ public class ScoreOrderFragmentsBuilderTest extends AbstractTestCase {
|
|||
String[] f = sofb.createFragments( reader, 0, F, ffl, 3 );
|
||||
assertEquals( 3, f.length );
|
||||
// check score order
|
||||
assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b", f[0] );
|
||||
assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b ", f[0] );
|
||||
assertEquals( "b b <b>a</b> b <b>a</b> b b b b b ", f[1] );
|
||||
assertEquals( "<b>a</b> b b b b b b b b b ", f[2] );
|
||||
}
|
||||
|
|
|
@ -143,7 +143,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
assertEquals( 1, ffl.fragInfos.size() );
|
||||
assertEquals( "subInfos=(d((6,7)))/1.0(0,100)", ffl.fragInfos.get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.fragInfos.get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test1PhraseLongMV() throws Exception {
|
||||
|
|
|
@ -34,11 +34,11 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
public void test1TermIndex() throws Exception {
|
||||
FieldFragList ffl = ffl( "a", "a" );
|
||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||
assertEquals( "<b>a</b>", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
assertEquals( "<b>a</b> ", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
|
||||
// change tags
|
||||
sfb = new SimpleFragmentsBuilder( new String[]{ "[" }, new String[]{ "]" } );
|
||||
assertEquals( "[a]", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
assertEquals( "[a] ", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
}
|
||||
|
||||
public void test2Frags() throws Exception {
|
||||
|
@ -48,7 +48,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
// 3 snippets requested, but should be 2
|
||||
assertEquals( 2, f.length );
|
||||
assertEquals( "<b>a</b> b b b b b b b b b ", f[0] );
|
||||
assertEquals( "b b <b>a</b> b <b>a</b> b", f[1] );
|
||||
assertEquals( "b b <b>a</b> b <b>a</b> b ", f[1] );
|
||||
}
|
||||
|
||||
public void test3Frags() throws Exception {
|
||||
|
@ -58,7 +58,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
assertEquals( 3, f.length );
|
||||
assertEquals( "<b>a</b> b b b b b b b b b ", f[0] );
|
||||
assertEquals( "b b <b>a</b> b <b>a</b> b b b b b ", f[1] );
|
||||
assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b", f[2] );
|
||||
assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b ", f[2] );
|
||||
}
|
||||
|
||||
public void testTagsAndEncoder() throws Exception {
|
||||
|
@ -66,7 +66,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||
String[] preTags = { "[" };
|
||||
String[] postTags = { "]" };
|
||||
assertEquals( "<h1> [a] </h1>",
|
||||
assertEquals( "<h1> [a] </h1> ",
|
||||
sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
|
||||
}
|
||||
|
||||
|
@ -88,7 +88,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||
assertEquals( "a b c <b>d</b> e", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
assertEquals( " b c <b>d</b> e ", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
}
|
||||
|
||||
public void test1PhraseLongMV() throws Exception {
|
||||
|
@ -113,7 +113,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||
assertEquals( "ssing <b>speed</b>, the", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
assertEquals( "ssing <b>speed</b>, the ", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
}
|
||||
|
||||
public void testUnstoredField() throws Exception {
|
||||
|
@ -163,6 +163,6 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||
sfb.setMultiValuedSeparator( '/' );
|
||||
assertEquals( "a b c/<b>d</b> e", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
assertEquals( " b c//<b>d</b> e/", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -76,9 +76,7 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
|||
// tokenized.
|
||||
if (field.isIndexed() && doInvert) {
|
||||
|
||||
final boolean anyToken;
|
||||
|
||||
if (fieldState.length > 0)
|
||||
if (i > 0)
|
||||
fieldState.position += docState.analyzer.getPositionIncrementGap(fieldInfo.name);
|
||||
|
||||
if (!field.isTokenized()) { // un-tokenized field
|
||||
|
@ -99,7 +97,6 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
|||
fieldState.offset += valueLength;
|
||||
fieldState.length++;
|
||||
fieldState.position++;
|
||||
anyToken = valueLength > 0;
|
||||
} else { // tokenized field
|
||||
final TokenStream stream;
|
||||
final TokenStream streamValue = field.tokenStreamValue();
|
||||
|
@ -189,14 +186,12 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
|||
stream.end();
|
||||
|
||||
fieldState.offset += offsetAttribute.endOffset();
|
||||
anyToken = fieldState.length > startLength;
|
||||
} finally {
|
||||
stream.close();
|
||||
}
|
||||
}
|
||||
|
||||
if (anyToken)
|
||||
fieldState.offset += docState.analyzer.getOffsetGap(field);
|
||||
fieldState.offset += docState.analyzer.getOffsetGap(field);
|
||||
fieldState.boost *= field.getBoost();
|
||||
}
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ public final class MockAnalyzer extends Analyzer {
|
|||
private final boolean lowerCase;
|
||||
private final CharacterRunAutomaton filter;
|
||||
private final boolean enablePositionIncrements;
|
||||
private int positionIncrementGap;
|
||||
|
||||
/**
|
||||
* Creates a new MockAnalyzer.
|
||||
|
@ -89,4 +90,13 @@ public final class MockAnalyzer extends Analyzer {
|
|||
return saved.filter;
|
||||
}
|
||||
}
|
||||
|
||||
public void setPositionIncrementGap(int positionIncrementGap){
|
||||
this.positionIncrementGap = positionIncrementGap;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getPositionIncrementGap(String fieldName){
|
||||
return positionIncrementGap;
|
||||
}
|
||||
}
|
|
@ -4266,11 +4266,11 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field"));
|
||||
TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0);
|
||||
assertEquals(1, termOffsets.length);
|
||||
assertEquals(0, termOffsets[0].getStartOffset());
|
||||
assertEquals(6, termOffsets[0].getEndOffset());
|
||||
assertEquals(1, termOffsets[0].getStartOffset());
|
||||
assertEquals(7, termOffsets[0].getEndOffset());
|
||||
termOffsets = tpv.getOffsets(1);
|
||||
assertEquals(7, termOffsets[0].getStartOffset());
|
||||
assertEquals(10, termOffsets[0].getEndOffset());
|
||||
assertEquals(8, termOffsets[0].getStartOffset());
|
||||
assertEquals(11, termOffsets[0].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
@ -4301,8 +4301,37 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
assertEquals(0, termOffsets[0].getStartOffset());
|
||||
assertEquals(4, termOffsets[0].getEndOffset());
|
||||
termOffsets = tpv.getOffsets(1);
|
||||
assertEquals(5, termOffsets[0].getStartOffset());
|
||||
assertEquals(11, termOffsets[0].getEndOffset());
|
||||
assertEquals(6, termOffsets[0].getStartOffset());
|
||||
assertEquals(12, termOffsets[0].getEndOffset());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-2529
|
||||
public void testPositionIncrementGapEmptyField() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
MockAnalyzer analyzer = new MockAnalyzer();
|
||||
analyzer.setPositionIncrementGap( 100 );
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, analyzer));
|
||||
Document doc = new Document();
|
||||
Field f = newField("field", "", Field.Store.NO,
|
||||
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
|
||||
Field f2 = newField("field", "crunch man", Field.Store.NO,
|
||||
Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
|
||||
doc.add(f);
|
||||
doc.add(f2);
|
||||
w.addDocument(doc);
|
||||
w.close();
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field"));
|
||||
int[] poss = tpv.getTermPositions(0);
|
||||
assertEquals(1, poss.length);
|
||||
assertEquals(100, poss[0]);
|
||||
poss = tpv.getTermPositions(1);
|
||||
assertEquals(1, poss.length);
|
||||
assertEquals(101, poss[0]);
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue