mirror of https://github.com/apache/lucene.git
LUCENE-1132: Updated documentation and deprecated bytes references in favor of character references
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@616305 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
38f8d4e837
commit
b95e9a5154
|
@ -22,7 +22,6 @@ import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
|
||||||
|
@ -34,9 +33,12 @@ import org.apache.lucene.util.PriorityQueue;
|
||||||
*/
|
*/
|
||||||
public class Highlighter
|
public class Highlighter
|
||||||
{
|
{
|
||||||
|
public static final int DEFAULT_MAX_CHARS_TO_ANALYZE = 50*1024;
|
||||||
public static final int DEFAULT_MAX_DOC_BYTES_TO_ANALYZE=50*1024;
|
/**
|
||||||
private int maxDocBytesToAnalyze=DEFAULT_MAX_DOC_BYTES_TO_ANALYZE;
|
* @deprecated See {@link #DEFAULT_MAX_CHARS_TO_ANALYZE}
|
||||||
|
*/
|
||||||
|
public static final int DEFAULT_MAX_DOC_BYTES_TO_ANALYZE=DEFAULT_MAX_CHARS_TO_ANALYZE;
|
||||||
|
private int maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE;
|
||||||
private Formatter formatter;
|
private Formatter formatter;
|
||||||
private Encoder encoder;
|
private Encoder encoder;
|
||||||
private Fragmenter textFragmenter=new SimpleFragmenter();
|
private Fragmenter textFragmenter=new SimpleFragmenter();
|
||||||
|
@ -224,7 +226,7 @@ public class Highlighter
|
||||||
|
|
||||||
TokenGroup tokenGroup=new TokenGroup();
|
TokenGroup tokenGroup=new TokenGroup();
|
||||||
token = tokenStream.next();
|
token = tokenStream.next();
|
||||||
while ((token!= null)&&(token.startOffset()<maxDocBytesToAnalyze))
|
while ((token!= null)&&(token.startOffset()< maxDocCharsToAnalyze))
|
||||||
{
|
{
|
||||||
if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct(token)))
|
if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct(token)))
|
||||||
{
|
{
|
||||||
|
@ -283,7 +285,7 @@ public class Highlighter
|
||||||
(lastEndOffset < text.length())
|
(lastEndOffset < text.length())
|
||||||
&&
|
&&
|
||||||
// and that text is not too large...
|
// and that text is not too large...
|
||||||
(text.length()<maxDocBytesToAnalyze)
|
(text.length()< maxDocCharsToAnalyze)
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
//append it to the last fragment
|
//append it to the last fragment
|
||||||
|
@ -471,23 +473,35 @@ public class Highlighter
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the maximum number of bytes to be tokenized per doc
|
* @return the maximum number of bytes to be tokenized per doc
|
||||||
|
*
|
||||||
|
* @deprecated See {@link #getMaxDocCharsToAnalyze()}, since this value has always counted on chars. They both set the same internal value, however
|
||||||
*/
|
*/
|
||||||
public int getMaxDocBytesToAnalyze()
|
public int getMaxDocBytesToAnalyze()
|
||||||
{
|
{
|
||||||
return maxDocBytesToAnalyze;
|
return maxDocCharsToAnalyze;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param byteCount the maximum number of bytes to be tokenized per doc
|
* @param byteCount the maximum number of bytes to be tokenized per doc
|
||||||
* (This can improve performance with large documents)
|
* (This can improve performance with large documents)
|
||||||
|
*
|
||||||
|
* @deprecated See {@link #setMaxDocCharsToAnalyze(int)}, since this value has always counted chars
|
||||||
*/
|
*/
|
||||||
public void setMaxDocBytesToAnalyze(int byteCount)
|
public void setMaxDocBytesToAnalyze(int byteCount)
|
||||||
{
|
{
|
||||||
maxDocBytesToAnalyze = byteCount;
|
maxDocCharsToAnalyze = byteCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public int getMaxDocCharsToAnalyze() {
|
||||||
|
return maxDocCharsToAnalyze;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMaxDocCharsToAnalyze(int maxDocCharsToAnalyze) {
|
||||||
|
this.maxDocCharsToAnalyze = maxDocCharsToAnalyze;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
*/
|
*/
|
||||||
public Fragmenter getTextFragmenter()
|
public Fragmenter getTextFragmenter()
|
||||||
{
|
{
|
||||||
|
|
|
@ -38,7 +38,7 @@ public class SimpleFragmenter implements Fragmenter
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param fragmentSize size in bytes of each fragment
|
* @param fragmentSize size in number of characters of each fragment
|
||||||
*/
|
*/
|
||||||
public SimpleFragmenter(int fragmentSize)
|
public SimpleFragmenter(int fragmentSize)
|
||||||
{
|
{
|
||||||
|
@ -67,7 +67,7 @@ public class SimpleFragmenter implements Fragmenter
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return size in bytes of each fragment
|
* @return size in number of characters of each fragment
|
||||||
*/
|
*/
|
||||||
public int getFragmentSize()
|
public int getFragmentSize()
|
||||||
{
|
{
|
||||||
|
@ -75,7 +75,7 @@ public class SimpleFragmenter implements Fragmenter
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param size size in bytes of each fragment
|
* @param size size in characters of each fragment
|
||||||
*/
|
*/
|
||||||
public void setFragmentSize(int size)
|
public void setFragmentSize(int size)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue