LUCENE-1132: Updated documentation and deprecated bytes references in favor of character references

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@616305 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2008-01-29 13:35:13 +00:00
parent 38f8d4e837
commit b95e9a5154
2 changed files with 27 additions and 13 deletions

View File

@ -22,7 +22,6 @@ import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.PriorityQueue;
@ -34,9 +33,12 @@ import org.apache.lucene.util.PriorityQueue;
*/ */
public class Highlighter public class Highlighter
{ {
public static final int DEFAULT_MAX_CHARS_TO_ANALYZE = 50*1024;
public static final int DEFAULT_MAX_DOC_BYTES_TO_ANALYZE=50*1024; /**
private int maxDocBytesToAnalyze=DEFAULT_MAX_DOC_BYTES_TO_ANALYZE; * @deprecated See {@link #DEFAULT_MAX_CHARS_TO_ANALYZE}
*/
public static final int DEFAULT_MAX_DOC_BYTES_TO_ANALYZE=DEFAULT_MAX_CHARS_TO_ANALYZE;
private int maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE;
private Formatter formatter; private Formatter formatter;
private Encoder encoder; private Encoder encoder;
private Fragmenter textFragmenter=new SimpleFragmenter(); private Fragmenter textFragmenter=new SimpleFragmenter();
@ -224,7 +226,7 @@ public class Highlighter
TokenGroup tokenGroup=new TokenGroup(); TokenGroup tokenGroup=new TokenGroup();
token = tokenStream.next(); token = tokenStream.next();
while ((token!= null)&&(token.startOffset()<maxDocBytesToAnalyze)) while ((token!= null)&&(token.startOffset()< maxDocCharsToAnalyze))
{ {
if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct(token))) if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct(token)))
{ {
@ -283,7 +285,7 @@ public class Highlighter
(lastEndOffset < text.length()) (lastEndOffset < text.length())
&& &&
// and that text is not too large... // and that text is not too large...
(text.length()<maxDocBytesToAnalyze) (text.length()< maxDocCharsToAnalyze)
) )
{ {
//append it to the last fragment //append it to the last fragment
@ -471,23 +473,35 @@ public class Highlighter
} }
/** /**
* @return the maximum number of bytes to be tokenized per doc * @return the maximum number of bytes to be tokenized per doc
*
* @deprecated See {@link #getMaxDocCharsToAnalyze()}, since this value has always counted on chars. They both set the same internal value, however
*/ */
public int getMaxDocBytesToAnalyze() public int getMaxDocBytesToAnalyze()
{ {
return maxDocBytesToAnalyze; return maxDocCharsToAnalyze;
} }
/** /**
* @param byteCount the maximum number of bytes to be tokenized per doc * @param byteCount the maximum number of bytes to be tokenized per doc
* (This can improve performance with large documents) * (This can improve performance with large documents)
*
* @deprecated See {@link #setMaxDocCharsToAnalyze(int)}, since this value has always counted chars
*/ */
public void setMaxDocBytesToAnalyze(int byteCount) public void setMaxDocBytesToAnalyze(int byteCount)
{ {
maxDocBytesToAnalyze = byteCount; maxDocCharsToAnalyze = byteCount;
} }
/** public int getMaxDocCharsToAnalyze() {
return maxDocCharsToAnalyze;
}
public void setMaxDocCharsToAnalyze(int maxDocCharsToAnalyze) {
this.maxDocCharsToAnalyze = maxDocCharsToAnalyze;
}
/**
*/ */
public Fragmenter getTextFragmenter() public Fragmenter getTextFragmenter()
{ {

View File

@ -38,7 +38,7 @@ public class SimpleFragmenter implements Fragmenter
/** /**
* *
* @param fragmentSize size in bytes of each fragment * @param fragmentSize size in number of characters of each fragment
*/ */
public SimpleFragmenter(int fragmentSize) public SimpleFragmenter(int fragmentSize)
{ {
@ -67,7 +67,7 @@ public class SimpleFragmenter implements Fragmenter
} }
/** /**
* @return size in bytes of each fragment * @return size in number of characters of each fragment
*/ */
public int getFragmentSize() public int getFragmentSize()
{ {
@ -75,7 +75,7 @@ public class SimpleFragmenter implements Fragmenter
} }
/** /**
* @param size size in bytes of each fragment * @param size size in characters of each fragment
*/ */
public void setFragmentSize(int size) public void setFragmentSize(int size)
{ {