mirror of https://github.com/apache/lucene.git
LUCENE-1389: SimpleSpanFragmenter can create very short fragments
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@713569 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4646692022
commit
c2f7d554a3
|
@ -33,6 +33,7 @@ public class SimpleSpanFragmenter implements Fragmenter {
|
|||
private int position = -1;
|
||||
private SpanScorer spanScorer;
|
||||
private int waitForPos = -1;
|
||||
private int textSize;
|
||||
|
||||
/**
|
||||
* @param spanscorer SpanScorer that was used to score hits
|
||||
|
@ -70,14 +71,14 @@ public class SimpleSpanFragmenter implements Fragmenter {
|
|||
for (int i = 0; i < positionSpans.size(); i++) {
|
||||
if (((PositionSpan) positionSpans.get(i)).start == position) {
|
||||
waitForPos = ((PositionSpan) positionSpans.get(i)).end + 1;
|
||||
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
boolean isNewFrag = token.endOffset() >= (fragmentSize * currentNumFrags);
|
||||
|
||||
boolean isNewFrag = token.endOffset() >= (fragmentSize * currentNumFrags)
|
||||
&& (textSize - token.endOffset()) >= (fragmentSize >>> 1);
|
||||
|
||||
if (isNewFrag) {
|
||||
currentNumFrags++;
|
||||
}
|
||||
|
@ -89,7 +90,8 @@ public class SimpleSpanFragmenter implements Fragmenter {
|
|||
* @see org.apache.lucene.search.highlight.Fragmenter#start(java.lang.String)
|
||||
*/
|
||||
public void start(String originalText) {
|
||||
position = 0;
|
||||
position = -1;
|
||||
currentNumFrags = 1;
|
||||
textSize = originalText.length();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -242,6 +242,46 @@ public class HighlighterTest extends TestCase implements Formatter {
|
|||
}
|
||||
}
|
||||
|
||||
public void testSimpleSpanFragmenter() throws Exception {
|
||||
doSearching("\"piece of text that is very long\"");
|
||||
|
||||
int maxNumFragmentsRequired = 2;
|
||||
|
||||
for (int i = 0; i < hits.length(); i++) {
|
||||
String text = hits.doc(i).get(FIELD_NAME);
|
||||
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer
|
||||
.tokenStream(FIELD_NAME, new StringReader(text)));
|
||||
SpanScorer spanscorer = new SpanScorer(query, FIELD_NAME, tokenStream);
|
||||
Highlighter highlighter = new Highlighter(this, spanscorer);
|
||||
highlighter.setTextFragmenter(new SimpleSpanFragmenter(spanscorer, 5));
|
||||
tokenStream.reset();
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text,
|
||||
maxNumFragmentsRequired, "...");
|
||||
System.out.println("\t" + result);
|
||||
|
||||
}
|
||||
|
||||
doSearching("\"been shot\"");
|
||||
|
||||
maxNumFragmentsRequired = 2;
|
||||
|
||||
for (int i = 0; i < hits.length(); i++) {
|
||||
String text = hits.doc(i).get(FIELD_NAME);
|
||||
CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer
|
||||
.tokenStream(FIELD_NAME, new StringReader(text)));
|
||||
SpanScorer spanscorer = new SpanScorer(query, FIELD_NAME, tokenStream);
|
||||
Highlighter highlighter = new Highlighter(this, spanscorer);
|
||||
highlighter.setTextFragmenter(new SimpleSpanFragmenter(spanscorer, 20));
|
||||
tokenStream.reset();
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text,
|
||||
maxNumFragmentsRequired, "...");
|
||||
System.out.println("\t" + result);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// position sensitive query added after position insensitive query
|
||||
public void testPosTermStdTerm() throws Exception {
|
||||
doSearching("y \"x y z\"");
|
||||
|
|
Loading…
Reference in New Issue