diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index 424fcacb788..542759777da 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -128,7 +128,7 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing do this: - if ((doc=td.skipTo(target)) != DocsEnum.NO_MORE_DOCS) { + if ((doc=td.advance(target)) != DocsEnum.NO_MORE_DOCS) { ... } diff --git a/lucene/src/java/org/apache/lucene/analysis/Token.java b/lucene/src/java/org/apache/lucene/analysis/Token.java index 6be3a764218..a50b934377c 100644 --- a/lucene/src/java/org/apache/lucene/analysis/Token.java +++ b/lucene/src/java/org/apache/lucene/analysis/Token.java @@ -45,8 +45,8 @@ import org.apache.lucene.util.AttributeImpl; with type "eos". The default token type is "word".

A Token can optionally have metadata (a.k.a. Payload) in the form of a variable - length byte array. Use {@link DocsAndPositionsEnum#getPayloadLength()} and - {@link DocsAndPositionsEnum#getPayload(byte[], int)} to retrieve the payloads from the index. + length byte array. Use {@link DocsAndPositionsEnum#getPayload()} to retrieve the + payloads from the index.

@@ -253,7 +253,7 @@ public class Token extends CharTermAttributeImpl * * * @param positionIncrement the distance from the prior term - * @see org.apache.lucene.index.TermPositions + * @see org.apache.lucene.index.DocsAndPositionsEnum */ public void setPositionIncrement(int positionIncrement) { if (positionIncrement < 0) diff --git a/lucene/src/java/org/apache/lucene/analysis/TokenStream.java b/lucene/src/java/org/apache/lucene/analysis/TokenStream.java index 6fb7e8cc4bb..606bdef4aae 100644 --- a/lucene/src/java/org/apache/lucene/analysis/TokenStream.java +++ b/lucene/src/java/org/apache/lucene/analysis/TokenStream.java @@ -74,7 +74,7 @@ import org.apache.lucene.util.AttributeSource; *

* Sometimes it is desirable to capture a current state of a TokenStream, * e.g., for buffering purposes (see {@link CachingTokenFilter}, - * {@link TeeSinkTokenFilter}). For this usecase + * TeeSinkTokenFilter). For this usecase * {@link AttributeSource#captureState} and {@link AttributeSource#restoreState} * can be used. *

The {@code TokenStream}-API in Lucene is based on the decorator pattern. diff --git a/lucene/src/java/org/apache/lucene/analysis/package.html b/lucene/src/java/org/apache/lucene/analysis/package.html index eb23fc7d1ac..d98f84f5d66 100644 --- a/lucene/src/java/org/apache/lucene/analysis/package.html +++ b/lucene/src/java/org/apache/lucene/analysis/package.html @@ -98,27 +98,22 @@ There are many post tokenization steps that can be done, including (but not limi

- Lucene Java provides a number of analysis capabilities, the most commonly used one being the {@link - org.apache.lucene.analysis.standard.StandardAnalyzer}. Many applications will have a long and industrious life with nothing more + Lucene Java provides a number of analysis capabilities, the most commonly used one being the StandardAnalyzer. + Many applications will have a long and industrious life with nothing more than the StandardAnalyzer. However, there are a few other classes/packages that are worth mentioning:

    -
  1. {@link org.apache.lucene.analysis.PerFieldAnalyzerWrapper} – Most Analyzers perform the same operation on all +
  2. PerFieldAnalyzerWrapper – Most Analyzers perform the same operation on all {@link org.apache.lucene.document.Field}s. The PerFieldAnalyzerWrapper can be used to associate a different Analyzer with different {@link org.apache.lucene.document.Field}s.
  3. The modules/analysis library located at the root of the Lucene distribution has a number of different Analyzer implementations to solve a variety of different problems related to searching. Many of the Analyzers are designed to analyze non-English languages.
  4. -
  5. The contrib/snowball library - located at the root of the Lucene distribution has Analyzer and TokenFilter - implementations for a variety of Snowball stemmers. - See http://snowball.tartarus.org - for more information on Snowball stemmers.
  6. There are a variety of Tokenizer and TokenFilter implementations in this package. Take a look around, chances are someone has implemented what you need.

Analysis is one of the main causes of performance degradation during indexing. Simply put, the more you analyze the slower the indexing (in most cases). - Perhaps your application would be just fine using the simple {@link org.apache.lucene.analysis.WhitespaceTokenizer} combined with a - {@link org.apache.lucene.analysis.StopFilter}. The contrib/benchmark library can be useful for testing out the speed of the analysis process. + Perhaps your application would be just fine using the simple WhitespaceTokenizer combined with a StopFilter. The contrib/benchmark library can be useful + for testing out the speed of the analysis process.

Invoking the Analyzer

@@ -229,7 +224,7 @@ the source code of any one of the many samples located in this package. public TokenStream tokenStream(final String fieldName, Reader reader) { final TokenStream ts = someAnalyzer.tokenStream(fieldName, reader); TokenStream res = new TokenStream() { - TermAttribute termAtt = addAttribute(TermAttribute.class); + CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); public boolean incrementToken() throws IOException { @@ -237,7 +232,7 @@ the source code of any one of the many samples located in this package. while (true) { boolean hasNext = ts.incrementToken(); if (hasNext) { - if (stopWords.contains(termAtt.term())) { + if (stopWords.contains(termAtt.toString())) { extraIncrement++; // filter this word continue; } @@ -282,7 +277,7 @@ is necessary that can transport custom types of data from the documents to the i

Attribute and AttributeSource

Lucene 2.9 therefore introduces a new pair of classes called {@link org.apache.lucene.util.Attribute} and {@link org.apache.lucene.util.AttributeSource}. An Attribute serves as a -particular piece of information about a text token. For example, {@link org.apache.lucene.analysis.tokenattributes.TermAttribute} +particular piece of information about a text token. For example, {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute} contains the term text of a token, and {@link org.apache.lucene.analysis.tokenattributes.OffsetAttribute} contains the start and end character offsets of a token. An AttributeSource is a collection of Attributes with a restriction: there may be only one instance of each attribute type. TokenStream now extends AttributeSource, which means that one can add Attributes to a TokenStream. Since TokenFilter extends TokenStream, all filters are also @@ -290,7 +285,7 @@ AttributeSources.

Lucene now provides six Attributes out of the box, which replace the variables the Token class has:

* - * @see org.apache.lucene.index.TermPositions + * @see org.apache.lucene.index.DocsAndPositionsEnum */ public interface PositionIncrementAttribute extends Attribute { /** Set the position increment. The default value is one. diff --git a/lucene/src/java/org/apache/lucene/index/Payload.java b/lucene/src/java/org/apache/lucene/index/Payload.java index 50a99129d69..f7639a3830f 100644 --- a/lucene/src/java/org/apache/lucene/index/Payload.java +++ b/lucene/src/java/org/apache/lucene/index/Payload.java @@ -30,7 +30,7 @@ import org.apache.lucene.util.ArrayUtil; * To store payloads in the index a {@link TokenStream} has to be used that * produces payload data. *

- * Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)} + * Use {@link DocsAndPositionsEnum#getPayload()} * to retrieve the payloads from the index.
* */ diff --git a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java index f94d6bc9bac..a03bb0b0934 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java @@ -48,8 +48,8 @@ public class SegmentWriteState { * tweaking this is rarely useful.*/ public final int termIndexInterval; - /** Expert: The fraction of {@link TermDocs} entries stored in skip tables, - * used to accelerate {@link TermDocs#skipTo(int)}. Larger values result in + /** Expert: The fraction of TermDocs entries stored in skip tables, + * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in * smaller indexes, greater acceleration, but fewer accelerable cases, while * smaller values result in bigger indexes, less acceleration and more * accelerable cases. More detailed experiments would be useful here. */ diff --git a/lucene/src/java/org/apache/lucene/index/Term.java b/lucene/src/java/org/apache/lucene/index/Term.java index d69c4811db5..3903255c58a 100644 --- a/lucene/src/java/org/apache/lucene/index/Term.java +++ b/lucene/src/java/org/apache/lucene/index/Term.java @@ -101,7 +101,7 @@ public final class Term implements Comparable, java.io.Serializable { * Therefore the bytes should not be modified after construction, for * example, you should clone a copy rather than pass reused bytes from * a TermsEnum. - * @param text The bytes of the new term (field is implicitly same as this Term instance) + * @param bytes The bytes of the new term (field is implicitly same as this Term instance) * @return A new Term */ public Term createTerm(BytesRef bytes) diff --git a/lucene/src/java/org/apache/lucene/index/TermVectorMapper.java b/lucene/src/java/org/apache/lucene/index/TermVectorMapper.java index c0da5bb25ec..bdca6327923 100644 --- a/lucene/src/java/org/apache/lucene/index/TermVectorMapper.java +++ b/lucene/src/java/org/apache/lucene/index/TermVectorMapper.java @@ -51,7 +51,7 @@ public abstract class TermVectorMapper { * Tell the mapper what to expect in regards to field, number of terms, offset and position storage. * This method will be called once before retrieving the vector for a field. * - * This method will be called before {@link #map(String,int,TermVectorOffsetInfo[],int[])}. + * This method will be called before {@link #map(BytesRef,int,TermVectorOffsetInfo[],int[])}. * @param field The field the vector is for * @param numTerms The number of terms that need to be mapped * @param storeOffsets true if the mapper should expect offset information diff --git a/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java b/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java index 13f12505f6a..43ccb0d55ea 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java +++ b/lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java @@ -76,7 +76,7 @@ public abstract class FieldCacheRangeFilter extends Filter { public abstract DocIdSet getDocIdSet(IndexReader reader) throws IOException; /** - * Creates a string range filter using {@link FieldCache#getStringIndex}. This works with all + * Creates a string range filter using {@link FieldCache#getTermsIndex}. This works with all * fields containing zero or one term in the field. The range can be half-open by setting one * of the values to null. */ diff --git a/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java b/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java index f95a51775e9..57f8be754a4 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java +++ b/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java @@ -19,6 +19,7 @@ package org.apache.lucene.search; import java.io.IOException; +import org.apache.lucene.index.DocsEnum; // javadoc @link import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.BytesRef; @@ -40,7 +41,7 @@ import org.apache.lucene.util.BytesRef; *

* * The first invocation of this filter on a given field will - * be slower, since a {@link FieldCache.StringIndex} must be + * be slower, since a {@link FieldCache.DocTermsIndex} must be * created. Subsequent invocations using the same field * will re-use this cache. However, as with all * functionality based on {@link FieldCache}, persistent RAM diff --git a/lucene/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/src/java/org/apache/lucene/search/FieldComparator.java index 358d5d32f23..9b140fff64b 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/src/java/org/apache/lucene/search/FieldComparator.java @@ -699,7 +699,7 @@ public abstract class FieldComparator { * ordinals. This is functionally equivalent to {@link * TermValComparator}, but it first resolves the string * to their relative ordinal positions (using the index - * returned by {@link FieldCache#getStringIndex}), and + * returned by {@link FieldCache#getTermsIndex}), and * does most comparisons using the ordinals. For medium * to large results, this comparator will be much faster * than {@link TermValComparator}. For very small diff --git a/lucene/src/java/org/apache/lucene/store/MMapDirectory.java b/lucene/src/java/org/apache/lucene/store/MMapDirectory.java index 6a394c4509e..58be3678ccb 100644 --- a/lucene/src/java/org/apache/lucene/store/MMapDirectory.java +++ b/lucene/src/java/org/apache/lucene/store/MMapDirectory.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.BufferUnderflowException; +import java.nio.channels.ClosedChannelException; // javadoc @link import java.nio.channels.FileChannel; import java.nio.channels.FileChannel.MapMode; diff --git a/lucene/src/java/org/apache/lucene/store/NIOFSDirectory.java b/lucene/src/java/org/apache/lucene/store/NIOFSDirectory.java index 5e28cf501aa..2a18262a2ab 100644 --- a/lucene/src/java/org/apache/lucene/store/NIOFSDirectory.java +++ b/lucene/src/java/org/apache/lucene/store/NIOFSDirectory.java @@ -20,6 +20,7 @@ package org.apache.lucene.store; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.channels.ClosedChannelException; // javadoc @link import java.nio.channels.FileChannel; import java.util.concurrent.Future; // javadoc @@ -29,7 +30,7 @@ import java.util.concurrent.Future; // javadoc * without synchronizing. *

* This class only uses FileChannel when reading; writing is achieved with - * {@link SimpleFSDirectory.SimpleFSIndexOutput}. + * {@link FSDirectory.FSIndexOutput}. *

* NOTE: NIOFSDirectory is not recommended on Windows because of a bug in * how FileChannel.read is implemented in Sun's JRE. Inside of the diff --git a/lucene/src/java/org/apache/lucene/util/automaton/Automaton.java b/lucene/src/java/org/apache/lucene/util/automaton/Automaton.java index ff5558995e9..aed2e52dabb 100644 --- a/lucene/src/java/org/apache/lucene/util/automaton/Automaton.java +++ b/lucene/src/java/org/apache/lucene/util/automaton/Automaton.java @@ -115,7 +115,6 @@ public class Automaton implements Serializable, Cloneable { * constructor, automata can be constructed manually from {@link State} and * {@link Transition} objects. * - * @see #setInitialState(State) * @see State * @see Transition */ diff --git a/lucene/src/java/org/apache/lucene/util/automaton/State.java b/lucene/src/java/org/apache/lucene/util/automaton/State.java index ab833db9b73..b19c7c9b27e 100644 --- a/lucene/src/java/org/apache/lucene/util/automaton/State.java +++ b/lucene/src/java/org/apache/lucene/util/automaton/State.java @@ -238,7 +238,7 @@ public class State implements Serializable, Comparable { /** * Return this state's number. *

- * Expert: Will be useless unless {@link Automaton#setStateNumbers(Set)} + * Expert: Will be useless unless {@link Automaton#getNumberedStates} * has been called first to number the states. * @return the number */