offer additional methods that take analyzer + text instead of tokenstream; fix some unused imports and variables

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@154444 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Daniel Naber 2005-02-19 19:08:52 +00:00
parent cf41b3d1cb
commit 05d0335dcd
2 changed files with 63 additions and 11 deletions

View File

@ -16,9 +16,11 @@ package org.apache.lucene.search.highlight;
*/ */
import java.io.IOException; import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.PriorityQueue;
@ -57,7 +59,23 @@ public class Highlighter
this.fragmentScorer = fragmentScorer; this.fragmentScorer = fragmentScorer;
} }
/**
* Highlights chosen terms in a text, extracting the most relevant section.
* This is a convenience method that calls
* {@link #getBestFragment(TokenStream, String)}
*
* @param analyzer the analyzer that will be used to split <code>text</code>
* into chunks
* @param text text to highlight terms in
*
* @return highlighted text fragment or null if no terms found
*/
public final String getBestFragment(Analyzer analyzer, String text)
throws IOException
{
TokenStream tokenStream = analyzer.tokenStream("field", new StringReader(text));
return getBestFragment(tokenStream, text);
}
/** /**
* Highlights chosen terms in a text, extracting the most relevant section. * Highlights chosen terms in a text, extracting the most relevant section.
@ -84,6 +102,29 @@ public class Highlighter
} }
return null; return null;
} }
/**
* Highlights chosen terms in a text, extracting the most relevant sections.
* This is a convenience method that calls
* {@link #getBestFragments(TokenStream, String, int)}
*
* @param analyzer the analyzer that will be used to split <code>text</code>
* into chunks
* @param text text to highlight terms in
* @param maxNumFragments the maximum number of fragments.
*
* @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
*/
public final String[] getBestFragments(
Analyzer analyzer,
String text,
int maxNumFragments)
throws IOException
{
TokenStream tokenStream = analyzer.tokenStream("field", new StringReader(text));
return getBestFragments(tokenStream, text, maxNumFragments);
}
/** /**
* Highlights chosen terms in a text, extracting the most relevant sections. * Highlights chosen terms in a text, extracting the most relevant sections.
* The document text is analysed in chunks to record hit statistics * The document text is analysed in chunks to record hit statistics

View File

@ -17,7 +17,6 @@ package org.apache.lucene.search.highlight;
*/ */
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.io.StringReader; import java.io.StringReader;
@ -27,7 +26,6 @@ import java.util.StringTokenizer;
import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import junit.framework.TestCase; import junit.framework.TestCase;
@ -50,7 +48,6 @@ import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
import org.w3c.dom.Element; import org.w3c.dom.Element;
import org.w3c.dom.NodeList; import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
/** /**
* JUnit Test for Highlighter class. * JUnit Test for Highlighter class.
@ -157,7 +154,6 @@ public class HighlighterTest extends TestCase implements Formatter
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5); assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
} }
public void testGetBestSingleFragment() throws Exception public void testGetBestSingleFragment() throws Exception
{ {
doSearching("Kennedy"); doSearching("Kennedy");
@ -172,6 +168,23 @@ public class HighlighterTest extends TestCase implements Formatter
System.out.println("\t" + result); System.out.println("\t" + result);
} }
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4); assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
numHighlights = 0;
for (int i = 0; i < hits.length(); i++)
{
String text = hits.doc(i).get(FIELD_NAME);
highlighter.getBestFragment(analyzer, text);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
numHighlights = 0;
for (int i = 0; i < hits.length(); i++)
{
String text = hits.doc(i).get(FIELD_NAME);
highlighter.getBestFragments(analyzer, text, 10);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
} }
public void testGetBestSingleFragmentWithWeights() throws Exception public void testGetBestSingleFragmentWithWeights() throws Exception
@ -278,7 +291,7 @@ public class HighlighterTest extends TestCase implements Formatter
TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(texts[0])); TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(texts[0]));
String result = highlighter.getBestFragment(tokenStream,texts[0]); String result = highlighter.getBestFragment(tokenStream,texts[0]);
assertTrue("Setting MaxDocBytesToAnalyze should have prevented " + assertTrue("Setting MaxDocBytesToAnalyze should have prevented " +
"us from finding matches for this record" + numHighlights + "us from finding matches for this record: " + numHighlights +
" found", numHighlights == 0); " found", numHighlights == 0);
} }
@ -322,7 +335,6 @@ public class HighlighterTest extends TestCase implements Formatter
Highlighter highlighter = Highlighter highlighter =
new Highlighter(this,new QueryScorer(query)); new Highlighter(this,new QueryScorer(query));
int highlightFragmentSizeInBytes = 40;
for (int i = 0; i < texts.length; i++) for (int i = 0; i < texts.length; i++)
{ {
String text = texts[i]; String text = texts[i];
@ -568,8 +580,8 @@ public class HighlighterTest extends TestCase implements Formatter
//========== THESE LOOK LIKE, WITH SOME MORE EFFORT THESE COULD BE //========== THESE LOOK LIKE, WITH SOME MORE EFFORT THESE COULD BE
//========== MADE MORE GENERALLY USEFUL. //========== MADE MORE GENERALLY USEFUL.
// TODO - make synonyms all interchangeable with each other and produce // TODO - make synonyms all interchangeable with each other and produce
// a version that does antonyms(?) - the "is a specialised type of ...." // a version that does hyponyms - the "is a specialised type of ...."
// so that car=audi, bmw and volkswagen but bmw != audi so different // so that car = audi, bmw and volkswagen but bmw != audi so different
// behaviour to synonyms // behaviour to synonyms
//=================================================================== //===================================================================
@ -587,7 +599,6 @@ class SynonymAnalyzer extends Analyzer
*/ */
public TokenStream tokenStream(String arg0, Reader arg1) public TokenStream tokenStream(String arg0, Reader arg1)
{ {
return new SynonymTokenizer(new LowerCaseTokenizer(arg1), synonyms); return new SynonymTokenizer(new LowerCaseTokenizer(arg1), synonyms);
} }
} }