mirror of https://github.com/apache/lucene.git
remove javadocs warnings
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@990822 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
46f0f8c559
commit
e7f171703e
|
@ -128,7 +128,7 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing
|
|||
|
||||
do this:
|
||||
|
||||
if ((doc=td.skipTo(target)) != DocsEnum.NO_MORE_DOCS) {
|
||||
if ((doc=td.advance(target)) != DocsEnum.NO_MORE_DOCS) {
|
||||
...
|
||||
}
|
||||
|
||||
|
|
|
@ -45,8 +45,8 @@ import org.apache.lucene.util.AttributeImpl;
|
|||
with type "eos". The default token type is "word".
|
||||
<p>
|
||||
A Token can optionally have metadata (a.k.a. Payload) in the form of a variable
|
||||
length byte array. Use {@link DocsAndPositionsEnum#getPayloadLength()} and
|
||||
{@link DocsAndPositionsEnum#getPayload(byte[], int)} to retrieve the payloads from the index.
|
||||
length byte array. Use {@link DocsAndPositionsEnum#getPayload()} to retrieve the
|
||||
payloads from the index.
|
||||
|
||||
<br><br>
|
||||
|
||||
|
@ -253,7 +253,7 @@ public class Token extends CharTermAttributeImpl
|
|||
*
|
||||
* </ul>
|
||||
* @param positionIncrement the distance from the prior term
|
||||
* @see org.apache.lucene.index.TermPositions
|
||||
* @see org.apache.lucene.index.DocsAndPositionsEnum
|
||||
*/
|
||||
public void setPositionIncrement(int positionIncrement) {
|
||||
if (positionIncrement < 0)
|
||||
|
|
|
@ -74,7 +74,7 @@ import org.apache.lucene.util.AttributeSource;
|
|||
* <p>
|
||||
* Sometimes it is desirable to capture a current state of a <code>TokenStream</code>,
|
||||
* e.g., for buffering purposes (see {@link CachingTokenFilter},
|
||||
* {@link TeeSinkTokenFilter}). For this usecase
|
||||
* TeeSinkTokenFilter). For this usecase
|
||||
* {@link AttributeSource#captureState} and {@link AttributeSource#restoreState}
|
||||
* can be used.
|
||||
* <p>The {@code TokenStream}-API in Lucene is based on the decorator pattern.
|
||||
|
|
|
@ -98,27 +98,22 @@ There are many post tokenization steps that can be done, including (but not limi
|
|||
</ul>
|
||||
</p>
|
||||
<p>
|
||||
Lucene Java provides a number of analysis capabilities, the most commonly used one being the {@link
|
||||
org.apache.lucene.analysis.standard.StandardAnalyzer}. Many applications will have a long and industrious life with nothing more
|
||||
Lucene Java provides a number of analysis capabilities, the most commonly used one being the StandardAnalyzer.
|
||||
Many applications will have a long and industrious life with nothing more
|
||||
than the StandardAnalyzer. However, there are a few other classes/packages that are worth mentioning:
|
||||
<ol>
|
||||
<li>{@link org.apache.lucene.analysis.PerFieldAnalyzerWrapper} – Most Analyzers perform the same operation on all
|
||||
<li>PerFieldAnalyzerWrapper – Most Analyzers perform the same operation on all
|
||||
{@link org.apache.lucene.document.Field}s. The PerFieldAnalyzerWrapper can be used to associate a different Analyzer with different
|
||||
{@link org.apache.lucene.document.Field}s.</li>
|
||||
<li>The modules/analysis library located at the root of the Lucene distribution has a number of different Analyzer implementations to solve a variety
|
||||
of different problems related to searching. Many of the Analyzers are designed to analyze non-English languages.</li>
|
||||
<li>The contrib/snowball library
|
||||
located at the root of the Lucene distribution has Analyzer and TokenFilter
|
||||
implementations for a variety of Snowball stemmers.
|
||||
See <a href="http://snowball.tartarus.org">http://snowball.tartarus.org</a>
|
||||
for more information on Snowball stemmers.</li>
|
||||
<li>There are a variety of Tokenizer and TokenFilter implementations in this package. Take a look around, chances are someone has implemented what you need.</li>
|
||||
</ol>
|
||||
</p>
|
||||
<p>
|
||||
Analysis is one of the main causes of performance degradation during indexing. Simply put, the more you analyze the slower the indexing (in most cases).
|
||||
Perhaps your application would be just fine using the simple {@link org.apache.lucene.analysis.WhitespaceTokenizer} combined with a
|
||||
{@link org.apache.lucene.analysis.StopFilter}. The contrib/benchmark library can be useful for testing out the speed of the analysis process.
|
||||
Perhaps your application would be just fine using the simple WhitespaceTokenizer combined with a StopFilter. The contrib/benchmark library can be useful
|
||||
for testing out the speed of the analysis process.
|
||||
</p>
|
||||
<h2>Invoking the Analyzer</h2>
|
||||
<p>
|
||||
|
@ -229,7 +224,7 @@ the source code of any one of the many samples located in this package.
|
|||
public TokenStream tokenStream(final String fieldName, Reader reader) {
|
||||
final TokenStream ts = someAnalyzer.tokenStream(fieldName, reader);
|
||||
TokenStream res = new TokenStream() {
|
||||
TermAttribute termAtt = addAttribute(TermAttribute.class);
|
||||
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
public boolean incrementToken() throws IOException {
|
||||
|
@ -237,7 +232,7 @@ the source code of any one of the many samples located in this package.
|
|||
while (true) {
|
||||
boolean hasNext = ts.incrementToken();
|
||||
if (hasNext) {
|
||||
if (stopWords.contains(termAtt.term())) {
|
||||
if (stopWords.contains(termAtt.toString())) {
|
||||
extraIncrement++; // filter this word
|
||||
continue;
|
||||
}
|
||||
|
@ -282,7 +277,7 @@ is necessary that can transport custom types of data from the documents to the i
|
|||
<h3>Attribute and AttributeSource</h3>
|
||||
Lucene 2.9 therefore introduces a new pair of classes called {@link org.apache.lucene.util.Attribute} and
|
||||
{@link org.apache.lucene.util.AttributeSource}. An Attribute serves as a
|
||||
particular piece of information about a text token. For example, {@link org.apache.lucene.analysis.tokenattributes.TermAttribute}
|
||||
particular piece of information about a text token. For example, {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}
|
||||
contains the term text of a token, and {@link org.apache.lucene.analysis.tokenattributes.OffsetAttribute} contains the start and end character offsets of a token.
|
||||
An AttributeSource is a collection of Attributes with a restriction: there may be only one instance of each attribute type. TokenStream now extends AttributeSource, which
|
||||
means that one can add Attributes to a TokenStream. Since TokenFilter extends TokenStream, all filters are also
|
||||
|
@ -290,7 +285,7 @@ AttributeSources.
|
|||
<p>
|
||||
Lucene now provides six Attributes out of the box, which replace the variables the Token class has:
|
||||
<ul>
|
||||
<li>{@link org.apache.lucene.analysis.tokenattributes.TermAttribute}<p>The term text of a token.</p></li>
|
||||
<li>{@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}<p>The term text of a token.</p></li>
|
||||
<li>{@link org.apache.lucene.analysis.tokenattributes.OffsetAttribute}<p>The start and end offset of token in characters.</p></li>
|
||||
<li>{@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute}<p>See above for detailed information about position increment.</p></li>
|
||||
<li>{@link org.apache.lucene.analysis.tokenattributes.PayloadAttribute}<p>The payload that a Token can optionally have.</p></li>
|
||||
|
@ -354,14 +349,14 @@ public class MyAnalyzer extends Analyzer {
|
|||
MyAnalyzer analyzer = new MyAnalyzer();
|
||||
TokenStream stream = analyzer.tokenStream("field", new StringReader(text));
|
||||
|
||||
// get the TermAttribute from the TokenStream
|
||||
TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
|
||||
// get the CharTermAttribute from the TokenStream
|
||||
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
|
||||
|
||||
stream.reset();
|
||||
|
||||
// print all tokens until stream is exhausted
|
||||
while (stream.incrementToken()) {
|
||||
System.out.println(termAtt.term());
|
||||
System.out.println(termAtt.toString());
|
||||
}
|
||||
|
||||
stream.end()
|
||||
|
@ -370,7 +365,7 @@ public class MyAnalyzer extends Analyzer {
|
|||
}
|
||||
</pre>
|
||||
In this easy example a simple white space tokenization is performed. In main() a loop consumes the stream and
|
||||
prints the term text of the tokens by accessing the TermAttribute that the WhitespaceTokenizer provides.
|
||||
prints the term text of the tokens by accessing the CharTermAttribute that the WhitespaceTokenizer provides.
|
||||
Here is the output:
|
||||
<pre>
|
||||
This
|
||||
|
@ -409,7 +404,7 @@ public final class LengthFilter extends TokenFilter {
|
|||
final int min;
|
||||
final int max;
|
||||
|
||||
private TermAttribute termAtt;
|
||||
private CharTermAttribute termAtt;
|
||||
|
||||
/**
|
||||
* Build a filter that removes words that are too long or too
|
||||
|
@ -420,7 +415,7 @@ public final class LengthFilter extends TokenFilter {
|
|||
super(in);
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
termAtt = addAttribute(CharTermAttribute.class);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -431,7 +426,7 @@ public final class LengthFilter extends TokenFilter {
|
|||
assert termAtt != null;
|
||||
// return the first non-stop word found
|
||||
while (input.incrementToken()) {
|
||||
int len = termAtt.termLength();
|
||||
int len = termAtt.length();
|
||||
if (len >= min && len <= max) {
|
||||
return true;
|
||||
}
|
||||
|
@ -442,11 +437,11 @@ public final class LengthFilter extends TokenFilter {
|
|||
}
|
||||
}
|
||||
</pre>
|
||||
The TermAttribute is added in the constructor and stored in the instance variable <code>termAtt</code>.
|
||||
Remember that there can only be a single instance of TermAttribute in the chain, so in our example the
|
||||
The CharTermAttribute is added in the constructor and stored in the instance variable <code>termAtt</code>.
|
||||
Remember that there can only be a single instance of CharTermAttribute in the chain, so in our example the
|
||||
<code>addAttribute()</code> call in LengthFilter returns the TermAttribute that the WhitespaceTokenizer already added. The tokens
|
||||
are retrieved from the input stream in the <code>incrementToken()</code> method. By looking at the term text
|
||||
in the TermAttribute the length of the term can be determined and too short or too long tokens are skipped.
|
||||
in the CharTermAttribute the length of the term can be determined and too short or too long tokens are skipped.
|
||||
Note how <code>incrementToken()</code> can efficiently access the instance variable; no attribute lookup
|
||||
is neccessary. The same is true for the consumer, which can simply use local references to the Attributes.
|
||||
|
||||
|
@ -521,17 +516,17 @@ that tags every word with a leading upper-case letter as a 'Noun' and all other
|
|||
<pre>
|
||||
public static class PartOfSpeechTaggingFilter extends TokenFilter {
|
||||
PartOfSpeechAttribute posAtt;
|
||||
TermAttribute termAtt;
|
||||
CharTermAttribute termAtt;
|
||||
|
||||
protected PartOfSpeechTaggingFilter(TokenStream input) {
|
||||
super(input);
|
||||
posAtt = addAttribute(PartOfSpeechAttribute.class);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
termAtt = addAttribute(CharTermAttribute.class);
|
||||
}
|
||||
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (!input.incrementToken()) {return false;}
|
||||
posAtt.setPartOfSpeech(determinePOS(termAtt.termBuffer(), 0, termAtt.termLength()));
|
||||
posAtt.setPartOfSpeech(determinePOS(termAtt.buffer(), 0, termAtt.length()));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -577,8 +572,8 @@ to make use of the new PartOfSpeechAttribute and print it out:
|
|||
MyAnalyzer analyzer = new MyAnalyzer();
|
||||
TokenStream stream = analyzer.tokenStream("field", new StringReader(text));
|
||||
|
||||
// get the TermAttribute from the TokenStream
|
||||
TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
|
||||
// get the CharTermAttribute from the TokenStream
|
||||
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
|
||||
|
||||
// get the PartOfSpeechAttribute from the TokenStream
|
||||
PartOfSpeechAttribute posAtt = stream.addAttribute(PartOfSpeechAttribute.class);
|
||||
|
@ -587,7 +582,7 @@ to make use of the new PartOfSpeechAttribute and print it out:
|
|||
|
||||
// print all tokens until stream is exhausted
|
||||
while (stream.incrementToken()) {
|
||||
System.out.println(termAtt.term() + ": " + posAtt.getPartOfSpeech());
|
||||
System.out.println(termAtt.toString() + ": " + posAtt.getPartOfSpeech());
|
||||
}
|
||||
|
||||
stream.end();
|
||||
|
|
|
@ -43,7 +43,7 @@ import org.apache.lucene.util.Attribute;
|
|||
*
|
||||
* </ul>
|
||||
*
|
||||
* @see org.apache.lucene.index.TermPositions
|
||||
* @see org.apache.lucene.index.DocsAndPositionsEnum
|
||||
*/
|
||||
public interface PositionIncrementAttribute extends Attribute {
|
||||
/** Set the position increment. The default value is one.
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.util.ArrayUtil;
|
|||
* To store payloads in the index a {@link TokenStream} has to be used that
|
||||
* produces payload data.
|
||||
* <p>
|
||||
* Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)}
|
||||
* Use {@link DocsAndPositionsEnum#getPayload()}
|
||||
* to retrieve the payloads from the index.<br>
|
||||
*
|
||||
*/
|
||||
|
|
|
@ -48,8 +48,8 @@ public class SegmentWriteState {
|
|||
* tweaking this is rarely useful.*/
|
||||
public final int termIndexInterval;
|
||||
|
||||
/** Expert: The fraction of {@link TermDocs} entries stored in skip tables,
|
||||
* used to accelerate {@link TermDocs#skipTo(int)}. Larger values result in
|
||||
/** Expert: The fraction of TermDocs entries stored in skip tables,
|
||||
* used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
|
||||
* smaller indexes, greater acceleration, but fewer accelerable cases, while
|
||||
* smaller values result in bigger indexes, less acceleration and more
|
||||
* accelerable cases. More detailed experiments would be useful here. */
|
||||
|
|
|
@ -101,7 +101,7 @@ public final class Term implements Comparable<Term>, java.io.Serializable {
|
|||
* Therefore the bytes should not be modified after construction, for
|
||||
* example, you should clone a copy rather than pass reused bytes from
|
||||
* a TermsEnum.
|
||||
* @param text The bytes of the new term (field is implicitly same as this Term instance)
|
||||
* @param bytes The bytes of the new term (field is implicitly same as this Term instance)
|
||||
* @return A new Term
|
||||
*/
|
||||
public Term createTerm(BytesRef bytes)
|
||||
|
|
|
@ -51,7 +51,7 @@ public abstract class TermVectorMapper {
|
|||
* Tell the mapper what to expect in regards to field, number of terms, offset and position storage.
|
||||
* This method will be called once before retrieving the vector for a field.
|
||||
*
|
||||
* This method will be called before {@link #map(String,int,TermVectorOffsetInfo[],int[])}.
|
||||
* This method will be called before {@link #map(BytesRef,int,TermVectorOffsetInfo[],int[])}.
|
||||
* @param field The field the vector is for
|
||||
* @param numTerms The number of terms that need to be mapped
|
||||
* @param storeOffsets true if the mapper should expect offset information
|
||||
|
|
|
@ -76,7 +76,7 @@ public abstract class FieldCacheRangeFilter<T> extends Filter {
|
|||
public abstract DocIdSet getDocIdSet(IndexReader reader) throws IOException;
|
||||
|
||||
/**
|
||||
* Creates a string range filter using {@link FieldCache#getStringIndex}. This works with all
|
||||
* Creates a string range filter using {@link FieldCache#getTermsIndex}. This works with all
|
||||
* fields containing zero or one term in the field. The range can be half-open by setting one
|
||||
* of the values to <code>null</code>.
|
||||
*/
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.DocsEnum; // javadoc @link
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -40,7 +41,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* <p/>
|
||||
*
|
||||
* The first invocation of this filter on a given field will
|
||||
* be slower, since a {@link FieldCache.StringIndex} must be
|
||||
* be slower, since a {@link FieldCache.DocTermsIndex} must be
|
||||
* created. Subsequent invocations using the same field
|
||||
* will re-use this cache. However, as with all
|
||||
* functionality based on {@link FieldCache}, persistent RAM
|
||||
|
|
|
@ -699,7 +699,7 @@ public abstract class FieldComparator {
|
|||
* ordinals. This is functionally equivalent to {@link
|
||||
* TermValComparator}, but it first resolves the string
|
||||
* to their relative ordinal positions (using the index
|
||||
* returned by {@link FieldCache#getStringIndex}), and
|
||||
* returned by {@link FieldCache#getTermsIndex}), and
|
||||
* does most comparisons using the ordinals. For medium
|
||||
* to large results, this comparator will be much faster
|
||||
* than {@link TermValComparator}. For very small
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.File;
|
|||
import java.io.RandomAccessFile;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.BufferUnderflowException;
|
||||
import java.nio.channels.ClosedChannelException; // javadoc @link
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.channels.FileChannel.MapMode;
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.store;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.ClosedChannelException; // javadoc @link
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.util.concurrent.Future; // javadoc
|
||||
|
||||
|
@ -29,7 +30,7 @@ import java.util.concurrent.Future; // javadoc
|
|||
* without synchronizing.
|
||||
* <p>
|
||||
* This class only uses FileChannel when reading; writing is achieved with
|
||||
* {@link SimpleFSDirectory.SimpleFSIndexOutput}.
|
||||
* {@link FSDirectory.FSIndexOutput}.
|
||||
* <p>
|
||||
* <b>NOTE</b>: NIOFSDirectory is not recommended on Windows because of a bug in
|
||||
* how FileChannel.read is implemented in Sun's JRE. Inside of the
|
||||
|
|
|
@ -115,7 +115,6 @@ public class Automaton implements Serializable, Cloneable {
|
|||
* constructor, automata can be constructed manually from {@link State} and
|
||||
* {@link Transition} objects.
|
||||
*
|
||||
* @see #setInitialState(State)
|
||||
* @see State
|
||||
* @see Transition
|
||||
*/
|
||||
|
|
|
@ -238,7 +238,7 @@ public class State implements Serializable, Comparable<State> {
|
|||
/**
|
||||
* Return this state's number.
|
||||
* <p>
|
||||
* Expert: Will be useless unless {@link Automaton#setStateNumbers(Set)}
|
||||
* Expert: Will be useless unless {@link Automaton#getNumberedStates}
|
||||
* has been called first to number the states.
|
||||
* @return the number
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue