From a53962cf5f152e1a847d103967b744f9a40fe91d Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 22 Jan 2013 00:01:38 +0000 Subject: [PATCH] javadocs git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1436696 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/DocValuesConsumer.java | 3 --- .../lucene/codecs/lucene40/package.html | 10 ++++---- .../lucene/codecs/lucene41/package.html | 10 ++++---- .../lucene/codecs/lucene42/package.html | 10 ++++---- .../perfield/PerFieldDocValuesFormat.java | 3 --- .../org/apache/lucene/document/FieldType.java | 2 +- .../org/apache/lucene/index/AtomicReader.java | 4 +-- .../apache/lucene/index/BinaryDocValues.java | 7 ++++++ .../apache/lucene/index/NumericDocValues.java | 9 +++++++ .../apache/lucene/index/SortedDocValues.java | 25 +++++++++++++++++++ .../java/org/apache/lucene/index/package.html | 2 +- .../org/apache/lucene/search/package.html | 2 +- .../lucene/search/similarities/package.html | 2 +- 13 files changed, 62 insertions(+), 27 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java index 839aa02a33d..5038de19bb6 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java @@ -37,10 +37,7 @@ import org.apache.lucene.util.PriorityQueue; // prototype streaming DV api public abstract class DocValuesConsumer implements Closeable { - // TODO: are any of these params too "infringing" on codec? - // we want codec to get necessary stuff from IW, but trading off against merge complexity. - // nocommit should we pass SegmentWriteState...? public abstract void addNumericField(FieldInfo field, Iterable values) throws IOException; public abstract void addBinaryField(FieldInfo field, Iterable values) throws IOException; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html index 659a6259a44..d20037b190a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html @@ -363,11 +363,11 @@ file, previously they were stored in text format only. frequencies.
  • In version 4.0, the format of the inverted index became extensible via the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage -({@link org.apache.lucene.index.DocValues DocValues}) was introduced. Normalization -factors need no longer be a single byte, they can be any DocValues -{@link org.apache.lucene.index.DocValues.Type type}. Terms need not be unicode -strings, they can be any byte sequence. Term offsets can optionally be indexed -into the postings lists. Payloads can be stored in the term vectors.
  • +({@code DocValues}) was introduced. Normalization factors need no longer be a +single byte, they can be any {@link org.apache.lucene.index.NumericDocValues NumericDocValues}. +Terms need not be unicode strings, they can be any byte sequence. Term offsets +can optionally be indexed into the postings lists. Payloads can be stored in the +term vectors.

    Limitations

    diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html index b2ef09ab47f..6a8a5b1b979 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html @@ -368,11 +368,11 @@ file, previously they were stored in text format only. frequencies.
  • In version 4.0, the format of the inverted index became extensible via the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage -({@link org.apache.lucene.index.DocValues DocValues}) was introduced. Normalization -factors need no longer be a single byte, they can be any DocValues -{@link org.apache.lucene.index.DocValues.Type type}. Terms need not be unicode -strings, they can be any byte sequence. Term offsets can optionally be indexed -into the postings lists. Payloads can be stored in the term vectors.
  • +({@code DocValues}) was introduced. Normalization factors need no longer be a +single byte, they can be any {@link org.apache.lucene.index.NumericDocValues NumericDocValues}. +Terms need not be unicode strings, they can be any byte sequence. Term offsets +can optionally be indexed into the postings lists. Payloads can be stored in the +term vectors.
  • In version 4.1, the format of the postings list changed to use either of FOR compression or variable-byte encoding, depending upon the frequency of the term.
  • diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html index 3418ba0e9ff..b49aa177ece 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html @@ -368,11 +368,11 @@ file, previously they were stored in text format only. frequencies.
  • In version 4.0, the format of the inverted index became extensible via the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage -({@link org.apache.lucene.index.DocValues DocValues}) was introduced. Normalization -factors need no longer be a single byte, they can be any DocValues -{@link org.apache.lucene.index.DocValues.Type type}. Terms need not be unicode -strings, they can be any byte sequence. Term offsets can optionally be indexed -into the postings lists. Payloads can be stored in the term vectors.
  • +({@code DocValues}) was introduced. Normalization factors need no longer be a +single byte, they can be any {@link org.apache.lucene.index.NumericDocValues NumericDocValues}. +Terms need not be unicode strings, they can be any byte sequence. Term offsets +can optionally be indexed into the postings lists. Payloads can be stored in the +term vectors.
  • In version 4.1, the format of the postings list changed to use either of FOR compression or variable-byte encoding, depending upon the frequency of the term.
  • diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java index 9e678803047..52f8a4b7eec 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java @@ -182,9 +182,6 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat { } } - // nocommit what if SimpleNormsFormat wants to use this - // ...? we have a "boolean isNorms" issue...? I guess we - // just need to make a PerFieldNormsFormat? private class FieldsReader extends DocValuesProducer { private final Map fields = new TreeMap(); diff --git a/lucene/core/src/java/org/apache/lucene/document/FieldType.java b/lucene/core/src/java/org/apache/lucene/document/FieldType.java index c0a2b9b64bf..b171faf17f5 100644 --- a/lucene/core/src/java/org/apache/lucene/document/FieldType.java +++ b/lucene/core/src/java/org/apache/lucene/document/FieldType.java @@ -416,7 +416,7 @@ public class FieldType implements IndexableFieldType { * {@inheritDoc} *

    * The default is null (no docValues) - * @see #setDocValueType(DocValuesType) + * @see #setDocValueType(org.apache.lucene.index.FieldInfo.DocValuesType) */ @Override public DocValuesType docValueType() { diff --git a/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java index 784284e9c8f..e545df47fb0 100644 --- a/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java @@ -175,10 +175,10 @@ public abstract class AtomicReader extends IndexReader { * used by a single thread. */ public abstract SortedDocValues getSortedDocValues(String field) throws IOException; - // nocommit document that these are thread-private: /** Returns {@link NumericDocValues} representing norms * for this field, or null if no {@link NumericDocValues} - * were indexed. */ + * were indexed. The returned instance should only be + * used by a single thread. */ public abstract NumericDocValues getNormValues(String field) throws IOException; /** diff --git a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java index 451df68e657..f18272e8ade 100644 --- a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java @@ -19,6 +19,9 @@ package org.apache.lucene.index; import org.apache.lucene.util.BytesRef; +/** + * A per-document byte[] + */ public abstract class BinaryDocValues { /** Lookup the value for document. @@ -29,8 +32,12 @@ public abstract class BinaryDocValues { * "private" instance should be used for each source. */ public abstract void get(int docID, BytesRef result); + /** + * Indicates the value was missing for the document. + */ public static final byte[] MISSING = new byte[0]; + /** An empty BinaryDocValues which returns empty bytes for every document */ public static final BinaryDocValues EMPTY = new BinaryDocValues() { @Override public void get(int docID, BytesRef result) { diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValues.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValues.java index 6480510e02f..c5fbe30450f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValues.java @@ -17,9 +17,18 @@ package org.apache.lucene.index; * limitations under the License. */ +/** + * A per-document numeric value. + */ public abstract class NumericDocValues { + /** + * Returns the numeric value for the specified document ID. + * @param docID document ID to lookup + * @return numeric value + */ public abstract long get(int docID); + /** An empty NumericDocValues which returns zero for every document */ public static final NumericDocValues EMPTY = new NumericDocValues() { @Override public long get(int docID) { diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java index 50fb75ebee5..95059f9da56 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java @@ -19,11 +19,35 @@ package org.apache.lucene.index; import org.apache.lucene.util.BytesRef; +/** + * A per-document byte[] with presorted values. + *

    + * Per-Document values in a SortedDocValues are deduplicated, dereferenced, + * and sorted into a dictionary of unique values. A pointer to the + * dictionary value (ordinal) can be retrieved for each document. Ordinals + * are dense and in increasing sorted order. + */ public abstract class SortedDocValues extends BinaryDocValues { + /** + * Returns the ordinal for the specified docID. + * @param docID document ID to lookup + * @return ordinal for the document: this is dense, starts at 0, then + * increments by 1 for the next value in sorted order. + */ public abstract int getOrd(int docID); + /** Retrieves the value for the specified ordinal. + * @param ord ordinal to lookup + * @param result will be populated with the ordinal's value + * @see #getOrd(int) + */ public abstract void lookupOrd(int ord, BytesRef result); + /** + * Returns the number of unique values. + * @return number of unique values in this SortedDocValues. This is + * also equivalent to one plus the maximum ordinal. + */ public abstract int getValueCount(); @Override @@ -37,6 +61,7 @@ public abstract class SortedDocValues extends BinaryDocValues { } } + /** An empty SortedDocValues which returns empty bytes for every document */ public static final SortedDocValues EMPTY = new SortedDocValues() { @Override public int getOrd(int docID) { diff --git a/lucene/core/src/java/org/apache/lucene/index/package.html b/lucene/core/src/java/org/apache/lucene/index/package.html index d4db08459f8..6870e08e479 100644 --- a/lucene/core/src/java/org/apache/lucene/index/package.html +++ b/lucene/core/src/java/org/apache/lucene/index/package.html @@ -254,7 +254,7 @@ its {@link org.apache.lucene.search.similarities.Similarity#computeNorm} method.

    Additional user-supplied statistics can be added to the document as DocValues fields and -accessed via {@link org.apache.lucene.index.AtomicReader#docValues}. +accessed via {@link org.apache.lucene.index.AtomicReader#getNumericDocValues}.

    diff --git a/lucene/core/src/java/org/apache/lucene/search/package.html b/lucene/core/src/java/org/apache/lucene/search/package.html index 52817bd2e42..53ebf8743fc 100644 --- a/lucene/core/src/java/org/apache/lucene/search/package.html +++ b/lucene/core/src/java/org/apache/lucene/search/package.html @@ -338,7 +338,7 @@ extend by plugging in a different component (e.g. term frequency normalizer). Finally, you can extend the low level {@link org.apache.lucene.search.similarities.Similarity Similarity} directly to implement a new retrieval model, or to use external scoring factors particular to your application. For example, a custom Similarity can access per-document values via {@link org.apache.lucene.search.FieldCache FieldCache} or -{@link org.apache.lucene.index.DocValues} and integrate them into the score. +{@link org.apache.lucene.index.NumericDocValues} and integrate them into the score.

    See the {@link org.apache.lucene.search.similarities} package documentation for information diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/package.html b/lucene/core/src/java/org/apache/lucene/search/similarities/package.html index 491255ef21a..bc235e5c54e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/package.html +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/package.html @@ -132,7 +132,7 @@ subclassing the Similarity, one can simply introduce a new basic model and tell matching term occurs. In these cases people have overridden Similarity to return 1 from the tf() method.

  • Changing Length Normalization — By overriding - {@link org.apache.lucene.search.similarities.Similarity#computeNorm(FieldInvertState state, Norm)}, + {@link org.apache.lucene.search.similarities.Similarity#computeNorm(FieldInvertState state)}, it is possible to discount how the length of a field contributes to a score. In {@link org.apache.lucene.search.similarities.DefaultSimilarity}, lengthNorm = 1 / (numTerms in field)^0.5, but if one changes this to be