mirror of https://github.com/apache/lucene.git
LUCENE-1822: BaseFragListBuilder hard-coded 6 char margin is too naive.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1395847 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5d1e9869a2
commit
15b7ffd6bf
|
@ -44,6 +44,11 @@ New Features
|
|||
the suggester to ignore such variations. (Robert Muir, Sudarshan
|
||||
Gaikaiwari, Mike McCandless)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-1822: BaseFragListBuilder hard-coded 6 char margin is too naive.
|
||||
(Alex Vigdor, Arcadius Ahouansou, Koji Sekiguchi)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-4443: BlockPostingsFormat no longer writes unnecessary offsets
|
||||
|
|
|
@ -69,13 +69,14 @@ public abstract class BaseFragListBuilder implements FragListBuilder {
|
|||
|
||||
wpil.clear();
|
||||
wpil.add( phraseInfo );
|
||||
int firstOffset = phraseInfo.getStartOffset();
|
||||
int st = phraseInfo.getStartOffset() - margin < startOffset ?
|
||||
startOffset : phraseInfo.getStartOffset() - margin;
|
||||
int en = st + fragCharSize;
|
||||
if( phraseInfo.getEndOffset() > en )
|
||||
en = phraseInfo.getEndOffset();
|
||||
startOffset = en;
|
||||
|
||||
int lastEndOffset = phraseInfo.getEndOffset();
|
||||
while( true ){
|
||||
if( ite.hasNext() ){
|
||||
phraseInfo = ite.next();
|
||||
|
@ -84,11 +85,22 @@ public abstract class BaseFragListBuilder implements FragListBuilder {
|
|||
}
|
||||
else
|
||||
break;
|
||||
if( phraseInfo.getEndOffset() <= en )
|
||||
if( phraseInfo.getEndOffset() <= en ){
|
||||
wpil.add( phraseInfo );
|
||||
lastEndOffset = phraseInfo.getEndOffset();
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
int matchLen = lastEndOffset - firstOffset;
|
||||
//now recalculate the start and end position to "center" the result
|
||||
int newMargin = (fragCharSize-matchLen)/2;
|
||||
st = firstOffset - newMargin;
|
||||
if(st<startOffset){
|
||||
st = startOffset;
|
||||
}
|
||||
en = st+fragCharSize;
|
||||
startOffset = en;
|
||||
fieldFragList.add( st, en, wpil );
|
||||
}
|
||||
return fieldFragList;
|
||||
|
|
|
@ -42,7 +42,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "abcdefghijklmnopqrs")), "abcdefghijklmnopqrs" ), sflb.minFragCharSize );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,18)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void testSmallerFragSizeThanPhraseQuery() throws Exception {
|
||||
|
@ -55,7 +55,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "abcdefgh jklmnopqrs" ), sflb.minFragCharSize );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(1,19)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test1TermIndex() throws Exception {
|
||||
|
@ -77,7 +77,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
|
||||
ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "b b b b a b b b b a" ), 20 );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(2,22)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(a((8,9))a((18,19)))/2.0(4,24)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test2TermsIndex2Frags() throws Exception {
|
||||
|
@ -85,7 +85,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b b b a" ), 20 );
|
||||
assertEquals( 2, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(a((0,1)))/1.0(0,20)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(a((28,29)))/1.0(22,42)", ffl.getFragInfos().get( 1 ).toString() );
|
||||
assertEquals( "subInfos=(a((28,29)))/1.0(20,40)", ffl.getFragInfos().get( 1 ).toString() );
|
||||
|
||||
ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "a")), "a b b b b b b b b b b b b a" ), 20 );
|
||||
assertEquals( 2, ffl.getFragInfos().size() );
|
||||
|
@ -164,7 +164,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(d((9,10)))/1.0(0,100)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test1PhraseLongMV() throws Exception {
|
||||
|
@ -176,7 +176,7 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(96,196)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(searchengines((102,116))searchengines((157,171)))/2.0(87,187)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
public void test1PhraseLongMVB() throws Exception {
|
||||
|
@ -188,6 +188,6 @@ public class SimpleFragListBuilderTest extends AbstractTestCase {
|
|||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(sppeeeed((88,93)))/1.0(82,182)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(sppeeeed((88,93)))/1.0(41,141)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -106,7 +106,8 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||
assertEquals( "a b c <b>d</b> e", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
// Should we probably be trimming?
|
||||
assertEquals( " a b c <b>d</b> e", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
}
|
||||
|
||||
public void test1PhraseLongMV() throws Exception {
|
||||
|
@ -118,7 +119,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||
assertEquals( "The most <b>search engines</b> use only one of these methods. Even the <b>search engines</b> that says they can use the",
|
||||
assertEquals( "customization: The most <b>search engines</b> use only one of these methods. Even the <b>search engines</b> that says they can",
|
||||
sfb.createFragment( reader, 0, F, ffl ) );
|
||||
}
|
||||
|
||||
|
@ -131,7 +132,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||
assertEquals( "processing <b>speed</b>, the", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
assertEquals( "additional hardware. \nWhen you talk about processing <b>speed</b>, the", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
}
|
||||
|
||||
public void testUnstoredField() throws Exception {
|
||||
|
@ -209,7 +210,7 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
String[] result = sfb.createFragments(reader, 0, F, ffl, 3);
|
||||
assertEquals(2, result.length);
|
||||
assertEquals("some <b>text</b> to highlight", result[0]);
|
||||
assertEquals("other <b>text</b>", result[1]);
|
||||
assertEquals("highlight other <b>text</b>", result[1]);
|
||||
|
||||
fq = new FieldQuery( tq( "highlight" ), true, true );
|
||||
stack = new FieldTermStack( reader, 0, F, fq );
|
||||
|
|
|
@ -29,7 +29,7 @@ public class WeightedFragListBuilderTest extends AbstractTestCase {
|
|||
WeightedFragListBuilder wflb = new WeightedFragListBuilder();
|
||||
FieldFragList ffl = wflb.createFieldFragList( fpl, 100 );
|
||||
assertEquals( 1, ffl.getFragInfos().size() );
|
||||
assertEquals( "subInfos=(theboth((195,203)))/0.86791086(189,289)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
assertEquals( "subInfos=(theboth((195,203)))/0.86791086(149,249)", ffl.getFragInfos().get( 0 ).toString() );
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue