offer additional methods that take analyzer + text instead of tokenstream; fix some unused imports and variables

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@154444 13f79535-47bb-0310-9956-ffa450edef68
2005-02-19 19:08:52 +00:00 · 2005-02-19 19:08:52 +00:00 · 05d0335dcd
parent cf41b3d1cb
commit 05d0335dcd
2 changed files with 63 additions and 11 deletions
--- a/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
+++ b/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
@ -16,9 +16,11 @@ package org.apache.lucene.search.highlight;
 */

 import java.io.IOException;
+import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Iterator;

+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.util.PriorityQueue;

@ -57,8 +59,24 @@ public class Highlighter
 		this.fragmentScorer = fragmentScorer;
 	}

-
-
+	/**
+	 * Highlights chosen terms in a text, extracting the most relevant section.
+	 * This is a convenience method that calls
+	 * {@link #getBestFragment(TokenStream, String)}
+	 *
+	 * @param analyzer   the analyzer that will be used to split <code>text</code>
+	 * into chunks  
+	 * @param text text to highlight terms in
+	 *
+	 * @return highlighted text fragment or null if no terms found
+	 */
+	public final String getBestFragment(Analyzer analyzer, String text)
+		throws IOException
+	{
+		TokenStream tokenStream = analyzer.tokenStream("field", new StringReader(text));
+		return getBestFragment(tokenStream, text);
+	}
+  
 	/**
 	 * Highlights chosen terms in a text, extracting the most relevant section.
 	 * The document text is analysed in chunks to record hit statistics
@ -84,6 +102,29 @@ public class Highlighter
 		}
 		return null;
 	}
+
+	/**
+	 * Highlights chosen terms in a text, extracting the most relevant sections.
+	 * This is a convenience method that calls
+	 * {@link #getBestFragments(TokenStream, String, int)}
+	 *
+	 * @param analyzer   the analyzer that will be used to split <code>text</code>
+	 * into chunks  
+	 * @param text        	text to highlight terms in
+	 * @param maxNumFragments  the maximum number of fragments.
+	 *
+	 * @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
+	 */
+	public final String[] getBestFragments(
+		Analyzer analyzer,	
+		String text,
+		int maxNumFragments)
+		throws IOException
+	{
+		TokenStream tokenStream = analyzer.tokenStream("field", new StringReader(text));
+		return getBestFragments(tokenStream, text, maxNumFragments);
+	}
+	
 	/**
 	 * Highlights chosen terms in a text, extracting the most relevant sections.
 	 * The document text is analysed in chunks to record hit statistics
--- a/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
+++ b/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
@ -17,7 +17,6 @@ package org.apache.lucene.search.highlight;
 */

 import java.io.ByteArrayInputStream;
-import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
@ -27,7 +26,6 @@ import java.util.StringTokenizer;

 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;

 import junit.framework.TestCase;

@ -50,7 +48,6 @@ import org.apache.lucene.search.Searcher;
 import org.apache.lucene.store.RAMDirectory;
 import org.w3c.dom.Element;
 import org.w3c.dom.NodeList;
-import org.xml.sax.SAXException;

 /**
 * JUnit Test for Highlighter class.
@ -157,7 +154,6 @@ public class HighlighterTest extends TestCase implements Formatter
 		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
 	}

-
 	public void testGetBestSingleFragment() throws Exception
 	{
 		doSearching("Kennedy");
@ -172,6 +168,23 @@ public class HighlighterTest extends TestCase implements Formatter
 			System.out.println("\t" + result);
 		}
 		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
+
+		numHighlights = 0;
+		for (int i = 0; i < hits.length(); i++)
+		{
+    		String text = hits.doc(i).get(FIELD_NAME);
+    		highlighter.getBestFragment(analyzer, text);
+		}
+		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
+
+		numHighlights = 0;
+		for (int i = 0; i < hits.length(); i++)
+		{
+    		String text = hits.doc(i).get(FIELD_NAME);
+    		highlighter.getBestFragments(analyzer, text, 10);
+		}
+		assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
+
 	}

 	public void testGetBestSingleFragmentWithWeights() throws Exception
@ -278,7 +291,7 @@ public class HighlighterTest extends TestCase implements Formatter
 		TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(texts[0]));
 		String result = highlighter.getBestFragment(tokenStream,texts[0]);
 		assertTrue("Setting MaxDocBytesToAnalyze should have prevented " +
-			"us from finding matches for this record" + numHighlights +
+			"us from finding matches for this record: " + numHighlights +
 			 " found", numHighlights == 0);
 	}

@ -322,7 +335,6 @@ public class HighlighterTest extends TestCase implements Formatter
 		Highlighter highlighter =
 			new Highlighter(this,new QueryScorer(query));

-		int highlightFragmentSizeInBytes = 40;
 		for (int i = 0; i < texts.length; i++)
 		{
 			String text = texts[i];
@ -568,8 +580,8 @@ public class HighlighterTest extends TestCase implements Formatter
 //========== THESE LOOK LIKE, WITH SOME MORE EFFORT THESE COULD BE
 //========== MADE MORE GENERALLY USEFUL.
 // TODO - make synonyms all interchangeable with each other and produce
-// a version that does antonyms(?) - the "is a specialised type of ...."
-// so that car=audi, bmw and volkswagen but bmw != audi so different
+// a version that does hyponyms - the "is a specialised type of ...."
+// so that car = audi, bmw and volkswagen but bmw != audi so different
 // behaviour to synonyms
 //===================================================================

@ -587,7 +599,6 @@ class SynonymAnalyzer extends Analyzer
 	 */
 	public TokenStream tokenStream(String arg0, Reader arg1)
 	{
-
 		return new SynonymTokenizer(new LowerCaseTokenizer(arg1), synonyms);
 	}
 }