mirror of https://github.com/apache/lucene.git
New low-level API exposed (getBestTextFragments) to provide access to fragment scores.
ToString() implemented on TextFragment objects to return text git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150997 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f920cff71a
commit
726ddaeb5a
|
@ -96,11 +96,8 @@ public class Highlighter
|
|||
throws IOException
|
||||
{
|
||||
maxNumFragments = Math.max(1, maxNumFragments); //sanity check
|
||||
StringBuffer newText = new StringBuffer();
|
||||
|
||||
TextFragment[] frag =getBestDocFragments(tokenStream,text, newText, maxNumFragments);
|
||||
|
||||
mergeContiguousFragments(frag);
|
||||
|
||||
TextFragment[] frag =getBestTextFragments(tokenStream,text, true,maxNumFragments);
|
||||
|
||||
//Get text
|
||||
ArrayList fragTexts = new ArrayList();
|
||||
|
@ -109,33 +106,35 @@ public class Highlighter
|
|||
{
|
||||
if ((frag[i] != null) && (frag[i].getScore() > 0))
|
||||
{
|
||||
fragTexts.add(
|
||||
newText.substring(
|
||||
frag[i].textStartPos,
|
||||
frag[i].textEndPos));
|
||||
fragTexts.add(frag[i].toString());
|
||||
}
|
||||
}
|
||||
return (String[]) fragTexts.toArray(new String[0]);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Low level api to get the most relevant sections of the document
|
||||
* Low level api to get the most relevant (formatted) sections of the document.
|
||||
* This method has been made public to allow visibility of score information held in TextFragment objects.
|
||||
* Thanks to Jason Calabrese for help in redefining the interface.
|
||||
* @param tokenStream
|
||||
* @param text
|
||||
* @param maxNumFragments
|
||||
* @param mergeContiguousFragments
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
private final TextFragment[] getBestDocFragments(
|
||||
public final TextFragment[] getBestTextFragments(
|
||||
TokenStream tokenStream,
|
||||
String text,
|
||||
StringBuffer newText,
|
||||
boolean mergeContiguousFragments,
|
||||
int maxNumFragments)
|
||||
throws IOException
|
||||
{
|
||||
ArrayList docFrags = new ArrayList();
|
||||
StringBuffer newText=new StringBuffer();
|
||||
|
||||
TextFragment currentFrag = new TextFragment(newText.length(), docFrags.size());
|
||||
TextFragment currentFrag = new TextFragment(newText,newText.length(), docFrags.size());
|
||||
fragmentScorer.startFragment(currentFrag);
|
||||
docFrags.add(currentFrag);
|
||||
|
||||
|
@ -175,7 +174,7 @@ public class Highlighter
|
|||
currentFrag.setScore(fragmentScorer.getFragmentScore());
|
||||
//record stats for a new fragment
|
||||
currentFrag.textEndPos = newText.length();
|
||||
currentFrag =new TextFragment(newText.length(), docFrags.size());
|
||||
currentFrag =new TextFragment(newText, newText.length(), docFrags.size());
|
||||
fragmentScorer.startFragment(currentFrag);
|
||||
docFrags.add(currentFrag);
|
||||
}
|
||||
|
@ -243,6 +242,22 @@ public class Highlighter
|
|||
{
|
||||
frag[i] = (TextFragment) fragQueue.pop();
|
||||
}
|
||||
|
||||
//merge any contiguous fragments to improve readability
|
||||
if(mergeContiguousFragments)
|
||||
{
|
||||
mergeContiguousFragments(frag);
|
||||
ArrayList fragTexts = new ArrayList();
|
||||
for (int i = 0; i < frag.length; i++)
|
||||
{
|
||||
if ((frag[i] != null) && (frag[i].getScore() > 0))
|
||||
{
|
||||
fragTexts.add(frag[i]);
|
||||
}
|
||||
}
|
||||
frag= (TextFragment[]) fragTexts.toArray(new TextFragment[0]);
|
||||
}
|
||||
|
||||
return frag;
|
||||
|
||||
}
|
||||
|
|
|
@ -25,13 +25,15 @@ package org.apache.lucene.search.highlight;
|
|||
*/
|
||||
public class TextFragment
|
||||
{
|
||||
StringBuffer markedUpText;
|
||||
int fragNum;
|
||||
int textStartPos;
|
||||
int textEndPos;
|
||||
float score;
|
||||
|
||||
public TextFragment(int textStartPos, int fragNum)
|
||||
public TextFragment(StringBuffer markedUpText,int textStartPos, int fragNum)
|
||||
{
|
||||
this.markedUpText=markedUpText;
|
||||
this.textStartPos = textStartPos;
|
||||
this.fragNum = fragNum;
|
||||
}
|
||||
|
@ -68,4 +70,10 @@ public class TextFragment
|
|||
return fragNum;
|
||||
}
|
||||
|
||||
/* Returns the marked-up text for this text fragment
|
||||
*/
|
||||
public String toString() {
|
||||
return markedUpText.substring(textStartPos, textEndPos);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
|
@ -41,12 +42,6 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.MultiSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
import org.apache.lucene.search.highlight.Formatter;
|
||||
import org.apache.lucene.search.highlight.Highlighter;
|
||||
import org.apache.lucene.search.highlight.QueryScorer;
|
||||
import org.apache.lucene.search.highlight.SimpleFragmenter;
|
||||
import org.apache.lucene.search.highlight.TokenGroup;
|
||||
import org.apache.lucene.search.highlight.WeightedTerm;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
/**
|
||||
|
@ -219,7 +214,6 @@ public class HighlighterTest extends TestCase implements Formatter
|
|||
}
|
||||
|
||||
|
||||
|
||||
public void testGetSimpleHighlight() throws Exception
|
||||
{
|
||||
doSearching("Kennedy");
|
||||
|
@ -237,6 +231,37 @@ public class HighlighterTest extends TestCase implements Formatter
|
|||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
|
||||
}
|
||||
|
||||
|
||||
public void testGetTextFragments() throws Exception
|
||||
{
|
||||
doSearching("Kennedy");
|
||||
Highlighter highlighter =
|
||||
new Highlighter(this,new QueryScorer(query));
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(20));
|
||||
|
||||
for (int i = 0; i < hits.length(); i++)
|
||||
{
|
||||
String text = hits.doc(i).get(FIELD_NAME);
|
||||
TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text));
|
||||
|
||||
String stringResults[] = highlighter.getBestFragments(tokenStream,text,10);
|
||||
|
||||
tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(text));
|
||||
TextFragment fragmentResults[] = highlighter.getBestTextFragments(tokenStream,text,true,10);
|
||||
|
||||
assertTrue("Failed to find correct number of text Fragments: " +
|
||||
fragmentResults.length + " vs "+ stringResults.length, fragmentResults.length==stringResults.length);
|
||||
for (int j = 0; j < stringResults.length; j++)
|
||||
{
|
||||
System.out.println(fragmentResults[j]);
|
||||
assertTrue("Failed to find same text Fragments: " +
|
||||
fragmentResults[j] + " found", fragmentResults[j].toString().equals(stringResults[j]));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public void testMaxSizeHighlight() throws Exception
|
||||
{
|
||||
doSearching("meat");
|
||||
|
|
Loading…
Reference in New Issue