LUCENE-1822: BaseFragListBuilder hard-coded 6 char margin is too naive.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1395847 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Koji Sekiguchi 2012-10-09 02:38:53 +00:00
parent 5d1e9869a2
commit 15b7ffd6bf
5 changed files with 32 additions and 14 deletions

View File

@ -44,6 +44,11 @@ New Features
the suggester to ignore such variations. (Robert Muir, Sudarshan
Gaikaiwari, Mike McCandless)
Bug Fixes
* LUCENE-1822: BaseFragListBuilder hard-coded 6 char margin is too naive.
(Alex Vigdor, Arcadius Ahouansou, Koji Sekiguchi)
Optimizations
* LUCENE-4443: BlockPostingsFormat no longer writes unnecessary offsets

View File

@ -69,13 +69,14 @@ public abstract class BaseFragListBuilder implements FragListBuilder {
wpil.clear();
wpil.add( phraseInfo );
int firstOffset = phraseInfo.getStartOffset();
int st = phraseInfo.getStartOffset() - margin < startOffset ?
startOffset : phraseInfo.getStartOffset() - margin;
int en = st + fragCharSize;
if( phraseInfo.getEndOffset() > en )
en = phraseInfo.getEndOffset();
startOffset = en;
int lastEndOffset = phraseInfo.getEndOffset();
while( true ){
if( ite.hasNext() ){
phraseInfo = ite.next();
@ -84,11 +85,22 @@ public abstract class BaseFragListBuilder implements FragListBuilder {
}
else
break;
if( phraseInfo.getEndOffset() <= en )
if( phraseInfo.getEndOffset() <= en ){
wpil.add( phraseInfo );
lastEndOffset = phraseInfo.getEndOffset();
}
else
break;
}
int matchLen = lastEndOffset - firstOffset;
//now recalculate the start and end position to "center" the result
int newMargin = (fragCharSize-matchLen)/2;
st = firstOffset - newMargin;
if(st<startOffset){
st = startOffset;
}
en = st+fragCharSize;
startOffset = en;
fieldFragList.add( st, en, wpil );
}
return fieldFragList;

View File

@ -42,7 +42,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "abcdefghijklmnopqrs")), "abcdefghijklmnopqrs" ), sflb.minFragCharSize );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,18)", ffl.getFragInfos().get( 0 ).toString() );
}
public void testSmallerFragSizeThanPhraseQuery() throws Exception {
@ -55,7 +55,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "abcdefgh jklmnopqrs" ), sflb.minFragCharSize );
assertEquals( 1, ffl.getFragInfos().size() );
if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(1,19)", ffl.getFragInfos().get( 0 ).toString() );
}
public void test1TermIndex() throws Exception {
@ -77,7 +77,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b b b b a b b b b a" ), 20 );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(4,24)", ffl.getFragInfos().get( 0 ).toString() );
}
public void test2TermsIndex2Frags() throws Exception {
@ -85,7 +85,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b b b a" ), 20 );
assertEquals( 2, ffl.getFragInfos().size() );
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.getFragInfos().get( 1 ).toString() );
assertEquals( "subInfos=(a((28,29)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b b a" ), 20 );
assertEquals( 2, ffl.getFragInfos().size() );
@ -164,7 +164,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(d((9,10)))/1.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
}
public void test1PhraseLongMV() throws Exception {
@ -176,7 +176,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(87,187)", ffl.getFragInfos().get( 0 ).toString() );
}
public void test1PhraseLongMVB() throws Exception {
@ -188,6 +188,6 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(sppeeeed((88,93)))/1.0(41,141)", ffl.getFragInfos().get( 0 ).toString() );
}
}

View File

@ -106,7 +106,8 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
assertEquals( "a b c <b>d</b> e", sfb.createFragment( reader, 0, F, ffl ) );
// Should we probably be trimming?
assertEquals( " a b c <b>d</b> e", sfb.createFragment( reader, 0, F, ffl ) );
}
public void test1PhraseLongMV() throws Exception {
@ -118,7 +119,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
assertEquals( "The most <b>search engines</b> use only one of these methods. Even the <b>search engines</b> that says they can use the",
assertEquals( "customization: The most <b>search engines</b> use only one of these methods. Even the <b>search engines</b> that says they can",
sfb.createFragment( reader, 0, F, ffl ) );
}
@ -131,7 +132,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
assertEquals( "processing <b>speed</b>, the", sfb.createFragment( reader, 0, F, ffl ) );
assertEquals( "additional hardware. \nWhen you talk about processing <b>speed</b>, the", sfb.createFragment( reader, 0, F, ffl ) );
}
public void testUnstoredField() throws Exception {
@ -209,7 +210,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
String[] result = sfb.createFragments(reader, 0, F, ffl, 3);
assertEquals(2, result.length);
assertEquals("some <b>text</b> to highlight", result[0]);
assertEquals("other <b>text</b>", result[1]);
assertEquals("highlight other <b>text</b>", result[1]);
fq = new FieldQuery( tq( "highlight" ), true, true );
stack = new FieldTermStack( reader, 0, F, fq );

View File

@ -29,7 +29,7 @@ public class WeightedFragListBuilderTest extends AbstractTestCase {
WeightedFragListBuilder wflb = new WeightedFragListBuilder();
FieldFragList ffl = wflb.createFieldFragList( fpl, 100 );
assertEquals( 1, ffl.getFragInfos().size() );
assertEquals( "subInfos=(theboth((195,203)))/0.86791086(189,289)", ffl.getFragInfos().get( 0 ).toString() );
assertEquals( "subInfos=(theboth((195,203)))/0.86791086(149,249)", ffl.getFragInfos().get( 0 ).toString() );
}
}