diff --git a/lucene/build.xml b/lucene/build.xml index 3ec293b61c6..ba3e349e01a 100644 --- a/lucene/build.xml +++ b/lucene/build.xml @@ -209,18 +209,18 @@ - + - + - + diff --git a/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java index d71d16c62fc..c2ff2c5c474 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java @@ -24,12 +24,23 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.Fields; import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.SegmentWriteState; // javadocs import org.apache.lucene.index.Terms; -/** Abstract API that consumes terms, doc, freq, prox, offset and - * payloads postings. Concrete implementations of this - * actually do "something" with the postings (write it into - * the index in a specific format). +/** + * Abstract API that consumes terms, doc, freq, prox, offset and + * payloads postings. Concrete implementations of this + * actually do "something" with the postings (write it into + * the index in a specific format). + *

+ * The lifecycle is: + *

    + *
  1. FieldsConsumer is created by + * {@link PostingsFormat#fieldsConsumer(SegmentWriteState)}. + *
  2. For each field, {@link #addField(FieldInfo)} is called, + * returning a {@link TermsConsumer} for the field. + *
  3. After all fields are added, the consumer is {@link #close}d. + *
* * @lucene.experimental */ diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java index 8cd3e69ae63..c8c4f8b24f2 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java @@ -28,19 +28,29 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; /** + * Abstract API that consumes postings for an individual term. + *

+ * The lifecycle is: + *

    + *
  1. PostingsConsumer is returned for each term by + * {@link TermsConsumer#startTerm(BytesRef)}. + *
  2. {@link #startDoc(int, int)} is called for each + * document where the term occurs, specifying id + * and term frequency for that document. + *
  3. If positions are enabled for the field, then + * {@link #addPosition(int, BytesRef, int, int)} + * will be called for each occurrence in the + * document. + *
  4. {@link #finishDoc()} is called when the producer + * is done adding positions to the document. + *
+ * * @lucene.experimental */ - public abstract class PostingsConsumer { /** Adds a new doc in this term. */ - public abstract void startDoc(int docID, int termDocFreq) throws IOException; - - public static class PostingsMergeState { - DocsEnum docsEnum; - int[] docMap; - int docBase; - } + public abstract void startDoc(int docID, int freq) throws IOException; /** Add a new position & payload, and start/end offset. A * null payload means no payload; a non-null payload with diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java index d86eb444198..91c17f0456a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java @@ -24,9 +24,17 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.index.FieldInfo; /** + * Extension of {@link PostingsConsumer} to support pluggable term dictionaries. + *

+ * This class contains additional hooks to interact with the provided + * term dictionaries such as {@link BlockTreeTermsWriter} and + * {@link BlockTermsWriter}. If you want to re-use one of these existing + * implementations and are only interested in customizing the format of + * the postings list, extend this class instead. + * + * @see PostingsReaderBase * @lucene.experimental */ - // TODO: find a better name; this defines the API that the // terms dict impls use to talk to a postings impl. // TermsDict + PostingsReader/WriterBase == PostingsConsumer/Producer diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java index 6cfbf5eae94..14d10238c59 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java @@ -20,6 +20,7 @@ package org.apache.lucene.codecs; import java.io.IOException; import java.util.Comparator; +import org.apache.lucene.index.FieldInfo; // javadocs import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.MergeState; import org.apache.lucene.index.TermsEnum; @@ -30,9 +31,25 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; /** + * Abstract API that consumes terms for an individual field. + *

+ * The lifecycle is: + *

    + *
  1. TermsConsumer is returned for each field + * by {@link FieldsConsumer#addField(FieldInfo)}. + *
  2. TermsConsumer returns a {@link PostingsConsumer} for + * each term in {@link #startTerm(BytesRef)}. + *
  3. When the producer (e.g. IndexWriter) + * is done adding documents for the term, it calls + * {@link #finishTerm(BytesRef, TermStats)}, passing in + * the accumulated term statistics. + *
  4. Producer calls {@link #finish(long, long, int)} with + * the accumulated collection statistics when it is finished + * adding terms to the field. + *
+ * * @lucene.experimental */ - public abstract class TermsConsumer { /** Starts a new term in this field; this may be called @@ -50,11 +67,11 @@ public abstract class TermsConsumer { * before feeding to this API. */ public abstract Comparator getComparator() throws IOException; - /** Default merge impl */ private MappingMultiDocsEnum docsEnum; private MappingMultiDocsEnum docsAndFreqsEnum; private MappingMultiDocsAndPositionsEnum postingsEnum; + /** Default merge impl */ public void merge(MergeState mergeState, TermsEnum termsEnum) throws IOException { BytesRef term; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermsIndexReaderBase.java b/lucene/core/src/java/org/apache/lucene/codecs/TermsIndexReaderBase.java index fe4a2bdcb84..7f79f0aede9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/TermsIndexReaderBase.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/TermsIndexReaderBase.java @@ -30,9 +30,9 @@ import java.io.Closeable; // frequent indexing /** - * TermsDictReader interacts with an instance of this class + * {@link BlockTermsReader} interacts with an instance of this class * to manage its terms index. The writer must accept - * indexed terms (many pairs of CharSequence text + long + * indexed terms (many pairs of BytesRef text + long * fileOffset), and then this reader must be able to * retrieve the nearest index term to a provided term * text. @@ -48,13 +48,15 @@ public abstract class TermsIndexReaderBase implements Closeable { public abstract int getDivisor(); - // Similar to TermsEnum, except, the only "metadata" it - // reports for a given indexed term is the long fileOffset - // into the main terms dict (_X.tis) file: + /** + * Similar to TermsEnum, except, the only "metadata" it + * reports for a given indexed term is the long fileOffset + * into the main terms dictionary file: + */ public static abstract class FieldIndexEnum { /** Seeks to "largest" indexed term that's <= - * term; retruns file pointer index (into the main + * term; returns file pointer index (into the main * terms index file) for that term */ public abstract long seek(BytesRef term) throws IOException; @@ -63,8 +65,10 @@ public abstract class TermsIndexReaderBase implements Closeable { public abstract BytesRef term(); - // Only impl'd if supportsOrd() returns true! + /** Only implemented if {@link TermsIndexReaderBase#supportsOrd()} returns true. */ public abstract long seek(long ord) throws IOException; + + /** Only implemented if {@link TermsIndexReaderBase#supportsOrd()} returns true. */ public abstract long ord(); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermsIndexWriterBase.java b/lucene/core/src/java/org/apache/lucene/codecs/TermsIndexWriterBase.java index bc98b9c1aa9..4c2d2eb409c 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/TermsIndexWriterBase.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/TermsIndexWriterBase.java @@ -23,9 +23,18 @@ import org.apache.lucene.util.BytesRef; import java.io.Closeable; import java.io.IOException; -/** @lucene.experimental */ +/** + * Base class for terms index implementations to plug + * into {@link BlockTermsWriter}. + * + * @see TermsIndexReaderBase + * @lucene.experimental + */ public abstract class TermsIndexWriterBase implements Closeable { + /** + * Terms index API for a single field. + */ public abstract class FieldWriter { public abstract boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException; public abstract void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexInput.java b/lucene/core/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexInput.java index 1760ec59f71..cdc7f165929 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexInput.java @@ -66,6 +66,11 @@ public abstract class FixedIntBlockIndexInput extends IntIndexInput { protected abstract BlockReader getBlockReader(IndexInput in, int[] buffer) throws IOException; + /** + * Interface for fixed-size block decoders. + *

+ * Implementations should decode into the buffer in {@link #readBlock}. + */ public interface BlockReader { public void readBlock() throws IOException; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexInput.java b/lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexInput.java index c8a0b12bc41..e2619da6321 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexInput.java @@ -68,6 +68,11 @@ public abstract class VariableIntBlockIndexInput extends IntIndexInput { protected abstract BlockReader getBlockReader(IndexInput in, int[] buffer) throws IOException; + /** + * Interface for variable-size block decoders. + *

+ * Implementations should decode into the buffer in {@link #readBlock}. + */ public interface BlockReader { public int readBlock() throws IOException; public void seek(long pos) throws IOException; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesConsumer.java index df1de4d5c3d..89afc2a6932 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesConsumer.java @@ -31,6 +31,10 @@ import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.IOUtils; /** + * Writes plain-text DocValues. + *

+ * FOR RECREATIONAL USE ONLY + * * @lucene.experimental */ public class SimpleTextDocValuesConsumer extends DocValuesConsumer { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java index b33186783a4..9ac8240c394 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java @@ -26,7 +26,12 @@ import org.apache.lucene.index.PerDocWriteState; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.util.BytesRef; + /** + * Plain-text DocValues format. + *

+ * FOR RECREATIONAL USE ONLY + * * @lucene.experimental */ public class SimpleTextDocValuesFormat extends DocValuesFormat { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java index 598a1f0f943..b241027812e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java @@ -38,7 +38,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; /** - * plain-text norms format + * plain-text norms format. *

* FOR RECREATIONAL USE ONLY * @@ -63,6 +63,13 @@ public class SimpleTextNormsFormat extends NormsFormat { SimpleTextNormsPerDocConsumer.files(info, files); } + /** + * Reads plain-text norms. + *

+ * FOR RECREATIONAL USE ONLY + * + * @lucene.experimental + */ public static class SimpleTextNormsPerDocProducer extends SimpleTextPerDocProducer { @@ -88,6 +95,13 @@ public class SimpleTextNormsFormat extends NormsFormat { } + /** + * Writes plain-text norms. + *

+ * FOR RECREATIONAL USE ONLY + * + * @lucene.experimental + */ public static class SimpleTextNormsPerDocConsumer extends SimpleTextPerDocConsumer { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java index 59e49c615a4..239d795ec08 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java @@ -47,6 +47,10 @@ import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.packed.PackedInts.Reader; /** + * Reads plain-text DocValues. + *

+ * FOR RECREATIONAL USE ONLY + * * @lucene.experimental */ public class SimpleTextPerDocProducer extends PerDocProducerBase { diff --git a/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java b/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java index 76db21f0dad..6e2564b0dcb 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java @@ -51,6 +51,12 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; +/** + * Example servlet that uses the XML queryparser. + *

+ * NOTE: you must provide CSV data in /WEB-INF/data.tsv + * for the demo to work! + */ public class FormBasedXmlQueryDemo extends HttpServlet { private QueryTemplateManager queryTemplateManager; diff --git a/lucene/misc/src/java/org/apache/lucene/store/NativePosixUtil.java b/lucene/misc/src/java/org/apache/lucene/store/NativePosixUtil.java index a8bb2b9eb7b..7c78e851313 100644 --- a/lucene/misc/src/java/org/apache/lucene/store/NativePosixUtil.java +++ b/lucene/misc/src/java/org/apache/lucene/store/NativePosixUtil.java @@ -21,6 +21,10 @@ import java.io.IOException; import java.io.FileDescriptor; import java.nio.ByteBuffer; +/** + * Provides JNI access to native methods such as madvise() for + * {@link NativeUnixDirectory} + */ public final class NativePosixUtil { public final static int NORMAL = 0; public final static int SEQUENTIAL = 1; diff --git a/lucene/misc/src/java/org/apache/lucene/store/WindowsDirectory.java b/lucene/misc/src/java/org/apache/lucene/store/WindowsDirectory.java index a36cd7ed2b3..19414345773 100644 --- a/lucene/misc/src/java/org/apache/lucene/store/WindowsDirectory.java +++ b/lucene/misc/src/java/org/apache/lucene/store/WindowsDirectory.java @@ -75,7 +75,7 @@ public class WindowsDirectory extends FSDirectory { return new WindowsIndexInput(new File(getDirectory(), name), Math.max(BufferedIndexInput.bufferSize(context), DEFAULT_BUFFERSIZE)); } - protected static class WindowsIndexInput extends BufferedIndexInput { + static class WindowsIndexInput extends BufferedIndexInput { private final long fd; private final long length; boolean isClone; diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java index 9e570671e17..3baf9a19432 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java @@ -34,6 +34,12 @@ import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.UnicodeUtil; +/** + * Suggest implementation based on + * JaSpell. + * + * @see JaspellTernarySearchTrie + */ public class JaspellLookup extends Lookup { JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie(); private boolean usePrefix = true; diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java index 4c6da404118..48b570c1b46 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java @@ -19,6 +19,11 @@ package org.apache.lucene.search.suggest.tst; import java.util.*; +/** + * Ternary Search Trie implementation. + * + * @see TernaryTreeNode + */ public class TSTAutocomplete { /** diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java index dc656e4aeeb..130fc714290 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java @@ -33,6 +33,12 @@ import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.UnicodeUtil; +/** + * Suggest implementation based on a + * Ternary Search Tree + * + * @see TSTAutocomplete + */ public class TSTLookup extends Lookup { TernaryTreeNode root = new TernaryTreeNode(); TSTAutocomplete autocomplete = new TSTAutocomplete();