LUCENE-1389: SimpleSpanFragmenter can create very short fragments

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@713569 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2008-11-13 00:01:43 +00:00
parent 4646692022
commit c2f7d554a3
2 changed files with 47 additions and 5 deletions

View File

@ -33,6 +33,7 @@ public class SimpleSpanFragmenter implements Fragmenter {
private int position = -1;
private SpanScorer spanScorer;
private int waitForPos = -1;
private int textSize;
/**
* @param spanscorer SpanScorer that was used to score hits
@ -70,14 +71,14 @@ public class SimpleSpanFragmenter implements Fragmenter {
for (int i = 0; i < positionSpans.size(); i++) {
if (((PositionSpan) positionSpans.get(i)).start == position) {
waitForPos = ((PositionSpan) positionSpans.get(i)).end + 1;
return true;
break;
}
}
}
boolean isNewFrag = token.endOffset() >= (fragmentSize * currentNumFrags);
boolean isNewFrag = token.endOffset() >= (fragmentSize * currentNumFrags)
&& (textSize - token.endOffset()) >= (fragmentSize >>> 1);
if (isNewFrag) {
currentNumFrags++;
}
@ -89,7 +90,8 @@ public class SimpleSpanFragmenter implements Fragmenter {
* @see org.apache.lucene.search.highlight.Fragmenter#start(java.lang.String)
*/
public void start(String originalText) {
position = 0;
position = -1;
currentNumFrags = 1;
textSize = originalText.length();
}
}

View File

@ -242,6 +242,46 @@ public class HighlighterTest extends TestCase implements Formatter {
}
}
public void testSimpleSpanFragmenter() throws Exception {
doSearching("\"piece of text that is very long\"");
int maxNumFragmentsRequired = 2;
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer
.tokenStream(FIELD_NAME, new StringReader(text)));
SpanScorer spanscorer = new SpanScorer(query, FIELD_NAME, tokenStream);
Highlighter highlighter = new Highlighter(this, spanscorer);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(spanscorer, 5));
tokenStream.reset();
String result = highlighter.getBestFragments(tokenStream, text,
maxNumFragmentsRequired, "...");
System.out.println("\t" + result);
}
doSearching("\"been shot\"");
maxNumFragmentsRequired = 2;
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer
.tokenStream(FIELD_NAME, new StringReader(text)));
SpanScorer spanscorer = new SpanScorer(query, FIELD_NAME, tokenStream);
Highlighter highlighter = new Highlighter(this, spanscorer);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(spanscorer, 20));
tokenStream.reset();
String result = highlighter.getBestFragments(tokenStream, text,
maxNumFragmentsRequired, "...");
System.out.println("\t" + result);
}
}
// position sensitive query added after position insensitive query
public void testPosTermStdTerm() throws Exception {
doSearching("y \"x y z\"");