mirror of https://github.com/apache/lucene.git
offer additional methods that take analyzer + text instead of tokenstream; fix some unused imports and variables
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@154444 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
cf41b3d1cb
commit
05d0335dcd
|
@ -16,9 +16,11 @@ package org.apache.lucene.search.highlight;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.StringReader;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
|
||||||
|
@ -57,7 +59,23 @@ public class Highlighter
|
||||||
this.fragmentScorer = fragmentScorer;
|
this.fragmentScorer = fragmentScorer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Highlights chosen terms in a text, extracting the most relevant section.
|
||||||
|
* This is a convenience method that calls
|
||||||
|
* {@link #getBestFragment(TokenStream, String)}
|
||||||
|
*
|
||||||
|
* @param analyzer the analyzer that will be used to split <code>text</code>
|
||||||
|
* into chunks
|
||||||
|
* @param text text to highlight terms in
|
||||||
|
*
|
||||||
|
* @return highlighted text fragment or null if no terms found
|
||||||
|
*/
|
||||||
|
public final String getBestFragment(Analyzer analyzer, String text)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
TokenStream tokenStream = analyzer.tokenStream("field", new StringReader(text));
|
||||||
|
return getBestFragment(tokenStream, text);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Highlights chosen terms in a text, extracting the most relevant section.
|
* Highlights chosen terms in a text, extracting the most relevant section.
|
||||||
|
@ -84,6 +102,29 @@ public class Highlighter
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Highlights chosen terms in a text, extracting the most relevant sections.
|
||||||
|
* This is a convenience method that calls
|
||||||
|
* {@link #getBestFragments(TokenStream, String, int)}
|
||||||
|
*
|
||||||
|
* @param analyzer the analyzer that will be used to split <code>text</code>
|
||||||
|
* into chunks
|
||||||
|
* @param text text to highlight terms in
|
||||||
|
* @param maxNumFragments the maximum number of fragments.
|
||||||
|
*
|
||||||
|
* @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
|
||||||
|
*/
|
||||||
|
public final String[] getBestFragments(
|
||||||
|
Analyzer analyzer,
|
||||||
|
String text,
|
||||||
|
int maxNumFragments)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
TokenStream tokenStream = analyzer.tokenStream("field", new StringReader(text));
|
||||||
|
return getBestFragments(tokenStream, text, maxNumFragments);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Highlights chosen terms in a text, extracting the most relevant sections.
|
* Highlights chosen terms in a text, extracting the most relevant sections.
|
||||||
* The document text is analysed in chunks to record hit statistics
|
* The document text is analysed in chunks to record hit statistics
|
||||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.search.highlight;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
@ -27,7 +26,6 @@ import java.util.StringTokenizer;
|
||||||
|
|
||||||
import javax.xml.parsers.DocumentBuilder;
|
import javax.xml.parsers.DocumentBuilder;
|
||||||
import javax.xml.parsers.DocumentBuilderFactory;
|
import javax.xml.parsers.DocumentBuilderFactory;
|
||||||
import javax.xml.parsers.ParserConfigurationException;
|
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
@ -50,7 +48,6 @@ import org.apache.lucene.search.Searcher;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.w3c.dom.Element;
|
import org.w3c.dom.Element;
|
||||||
import org.w3c.dom.NodeList;
|
import org.w3c.dom.NodeList;
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* JUnit Test for Highlighter class.
|
* JUnit Test for Highlighter class.
|
||||||
|
@ -157,7 +154,6 @@ public class HighlighterTest extends TestCase implements Formatter
|
||||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
|
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testGetBestSingleFragment() throws Exception
|
public void testGetBestSingleFragment() throws Exception
|
||||||
{
|
{
|
||||||
doSearching("Kennedy");
|
doSearching("Kennedy");
|
||||||
|
@ -172,6 +168,23 @@ public class HighlighterTest extends TestCase implements Formatter
|
||||||
System.out.println("\t" + result);
|
System.out.println("\t" + result);
|
||||||
}
|
}
|
||||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
|
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
|
||||||
|
|
||||||
|
numHighlights = 0;
|
||||||
|
for (int i = 0; i < hits.length(); i++)
|
||||||
|
{
|
||||||
|
String text = hits.doc(i).get(FIELD_NAME);
|
||||||
|
highlighter.getBestFragment(analyzer, text);
|
||||||
|
}
|
||||||
|
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
|
||||||
|
|
||||||
|
numHighlights = 0;
|
||||||
|
for (int i = 0; i < hits.length(); i++)
|
||||||
|
{
|
||||||
|
String text = hits.doc(i).get(FIELD_NAME);
|
||||||
|
highlighter.getBestFragments(analyzer, text, 10);
|
||||||
|
}
|
||||||
|
assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testGetBestSingleFragmentWithWeights() throws Exception
|
public void testGetBestSingleFragmentWithWeights() throws Exception
|
||||||
|
@ -278,7 +291,7 @@ public class HighlighterTest extends TestCase implements Formatter
|
||||||
TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(texts[0]));
|
TokenStream tokenStream=analyzer.tokenStream(FIELD_NAME,new StringReader(texts[0]));
|
||||||
String result = highlighter.getBestFragment(tokenStream,texts[0]);
|
String result = highlighter.getBestFragment(tokenStream,texts[0]);
|
||||||
assertTrue("Setting MaxDocBytesToAnalyze should have prevented " +
|
assertTrue("Setting MaxDocBytesToAnalyze should have prevented " +
|
||||||
"us from finding matches for this record" + numHighlights +
|
"us from finding matches for this record: " + numHighlights +
|
||||||
" found", numHighlights == 0);
|
" found", numHighlights == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -322,7 +335,6 @@ public class HighlighterTest extends TestCase implements Formatter
|
||||||
Highlighter highlighter =
|
Highlighter highlighter =
|
||||||
new Highlighter(this,new QueryScorer(query));
|
new Highlighter(this,new QueryScorer(query));
|
||||||
|
|
||||||
int highlightFragmentSizeInBytes = 40;
|
|
||||||
for (int i = 0; i < texts.length; i++)
|
for (int i = 0; i < texts.length; i++)
|
||||||
{
|
{
|
||||||
String text = texts[i];
|
String text = texts[i];
|
||||||
|
@ -568,8 +580,8 @@ public class HighlighterTest extends TestCase implements Formatter
|
||||||
//========== THESE LOOK LIKE, WITH SOME MORE EFFORT THESE COULD BE
|
//========== THESE LOOK LIKE, WITH SOME MORE EFFORT THESE COULD BE
|
||||||
//========== MADE MORE GENERALLY USEFUL.
|
//========== MADE MORE GENERALLY USEFUL.
|
||||||
// TODO - make synonyms all interchangeable with each other and produce
|
// TODO - make synonyms all interchangeable with each other and produce
|
||||||
// a version that does antonyms(?) - the "is a specialised type of ...."
|
// a version that does hyponyms - the "is a specialised type of ...."
|
||||||
// so that car=audi, bmw and volkswagen but bmw != audi so different
|
// so that car = audi, bmw and volkswagen but bmw != audi so different
|
||||||
// behaviour to synonyms
|
// behaviour to synonyms
|
||||||
//===================================================================
|
//===================================================================
|
||||||
|
|
||||||
|
@ -587,7 +599,6 @@ class SynonymAnalyzer extends Analyzer
|
||||||
*/
|
*/
|
||||||
public TokenStream tokenStream(String arg0, Reader arg1)
|
public TokenStream tokenStream(String arg0, Reader arg1)
|
||||||
{
|
{
|
||||||
|
|
||||||
return new SynonymTokenizer(new LowerCaseTokenizer(arg1), synonyms);
|
return new SynonymTokenizer(new LowerCaseTokenizer(arg1), synonyms);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue