javadocs

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1438847 13f79535-47bb-0310-9956-ffa450edef68
2013-01-26 04:47:29 +00:00 · 2013-01-26 04:47:29 +00:00 · 710a1ca160
parent e9b5edc750
commit 710a1ca160
21 changed files with 163 additions and 50 deletions
--- a/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationDocValuesField.java
+++ b/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationDocValuesField.java
@ -19,13 +19,20 @@ package org.apache.lucene.collation;

 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.search.FieldCacheRangeFilter;
 import org.apache.lucene.util.BytesRef;

 import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.RawCollationKey;

 /**
- * nocommit
+ * Indexes collation keys as a single-valued {@link SortedDocValuesField}.
+ * <p>
+ * This is more efficient that {@link ICUCollationKeyAnalyzer} if the field 
+ * only has one value: no uninversion is necessary to sort on the field, 
+ * locale-sensitive range queries can still work via {@link FieldCacheRangeFilter}, 
+ * and the underlying data structures built at index-time are likely more efficient 
+ * and use less memory than FieldCache.
 */
 public final class ICUCollationDocValuesField extends Field {
  private final String name;
@ -33,6 +40,17 @@ public final class ICUCollationDocValuesField extends Field {
  private final BytesRef bytes = new BytesRef();
  private final RawCollationKey key = new RawCollationKey();
  
+  /**
+   * Create a new ICUCollationDocValuesField.
+   * <p>
+   * NOTE: you should not create a new one for each document, instead
+   * just make one and reuse it during your indexing process, setting
+   * the value via {@link #setStringValue(String)}.
+   * @param name field name
+   * @param collator Collator for generating collation keys.
+   */
+  // TODO: can we make this trap-free? maybe just synchronize on the collator
+  // instead? 
  public ICUCollationDocValuesField(String name, Collator collator) {
    super(name, SortedDocValuesField.TYPE);
    this.name = name;
@ -48,6 +66,7 @@ public final class ICUCollationDocValuesField extends Field {
    return name;
  }
  
+  @Override
  public void setStringValue(String value) {
    collator.getRawCollationKey(value, key);
    bytes.bytes = key.bytes;
@ -60,5 +79,5 @@ public final class ICUCollationDocValuesField extends Field {
    return bytes;
  }
  
-  // nocommit: make this thing trap-free
+  // nocommit: UOE the other field methods? or set to empty bytesref initially so this just works...
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
@ -58,6 +58,10 @@ import org.apache.lucene.util.PriorityQueue;
 * @lucene.experimental
 */
 public abstract class DocValuesConsumer implements Closeable {
+  
+  /** Sole constructor. (For invocation by subclass 
+   *  constructors, typically implicit.) */
+  protected DocValuesConsumer() {}

  /**
   * Writes numeric docvalues for a field.
@ -232,7 +236,7 @@ public abstract class DocValuesConsumer implements Closeable {
                   });
  }

-  public static class SortedBytesMerger {
+  static class SortedBytesMerger {

    public int numMergedTerms;

--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesFormat.java
@ -62,8 +62,20 @@ public abstract class DocValuesFormat implements NamedSPILoader.NamedSPI {
    this.name = name;
  }

+  /** Returns a {@link DocValuesConsumer} to write docvalues to the
+   *  index. */
  public abstract DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException;

+  /** 
+   * Returns a {@link DocValuesProducer} to read docvalues from the index. 
+   * <p>
+   * NOTE: by the time this call returns, it must hold open any files it will 
+   * need to use; else, those files may be deleted. Additionally, required files 
+   * may be deleted during the execution of this call before there is a chance 
+   * to open them. Under these circumstances an IOException should be thrown by 
+   * the implementation. IOExceptions are expected and will automatically cause 
+   * a retry of the segment opening logic with the newly revised segments.
+   */
  public abstract DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException;

  @Override
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
@ -25,16 +25,29 @@ import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SortedDocValues;

-// nocommit add javadocs stating that this must open all
-// necessary files "on init", not later eg in .getXXX, else
-// an IW that deletes a commit will cause an SR to hit
-// exceptions....
-
+/** Abstract API that produces numeric, binary and
+ * sorted docvalues.
+ *
+ * @lucene.experimental
+ */
 public abstract class DocValuesProducer implements Closeable {
+  
+  /** Sole constructor. (For invocation by subclass 
+   *  constructors, typically implicit.) */
+  protected DocValuesProducer() {}

+  /** Returns {@link NumericDocValues} for this field.
+   *  The returned instance need not be thread-safe: it will only be
+   *  used by a single thread. */
  public abstract NumericDocValues getNumeric(FieldInfo field) throws IOException;

+  /** Returns {@link BinaryDocValues} for this field.
+   *  The returned instance need not be thread-safe: it will only be
+   *  used by a single thread. */
  public abstract BinaryDocValues getBinary(FieldInfo field) throws IOException;

+  /** Returns {@link SortedDocValues} for this field.
+   *  The returned instance need not be thread-safe: it will only be
+   *  used by a single thread. */
  public abstract SortedDocValues getSorted(FieldInfo field) throws IOException;
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/NormsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/NormsFormat.java
@ -35,7 +35,15 @@ public abstract class NormsFormat {
   *  index. */
  public abstract DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException;

-  /** Returns a {@link DocValuesProducer} to read norms from the
-   *  index. */
+  /** 
+   * Returns a {@link DocValuesProducer} to read norms from the index. 
+   * <p>
+   * NOTE: by the time this call returns, it must hold open any files it will 
+   * need to use; else, those files may be deleted. Additionally, required files 
+   * may be deleted during the execution of this call before there is a chance 
+   * to open them. Under these circumstances an IOException should be thrown by 
+   * the implementation. IOExceptions are expected and will automatically cause 
+   * a retry of the segment opening logic with the newly revised segments.
+   */
  public abstract DocValuesProducer normsProducer(SegmentReadState state) throws IOException;
 }
--- a/lucene/core/src/java/org/apache/lucene/document/BinaryDocValuesField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/BinaryDocValuesField.java
@ -1,8 +1,5 @@
 package org.apache.lucene.document;

-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.util.BytesRef;
-
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -20,14 +17,45 @@ import org.apache.lucene.util.BytesRef;
 * limitations under the License.
 */

+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Field that stores a per-document {@link BytesRef} value.  
+ * <p>
+ * The values are stored directly with no sharing, which is a good fit when
+ * the fields don't share (many) values, such as a title field.  If values 
+ * may be shared and sorted it's better to use {@link SortedDocValuesField}.  
+ * Here's an example usage:
+ * 
+ * <pre class="prettyprint">
+ *   document.add(new BinaryDocValuesField(name, new BytesRef("hello")));
+ * </pre>
+ * 
+ * <p>
+ * If you also need to store the value, you should add a
+ * separate {@link StoredField} instance.
+ * 
+ * @see BinaryDocValues
+ * */
 public class BinaryDocValuesField extends StoredField {
  
+  /**
+   * Type for straight bytes DocValues.
+   */
  public static final FieldType TYPE = new FieldType();
  static {
    TYPE.setDocValueType(FieldInfo.DocValuesType.BINARY);
    TYPE.freeze();
  }
  
+  /**
+   * Create a new binary DocValues field.
+   * @param name field name
+   * @param value binary content
+   * @throws IllegalArgumentException if the field name is null
+   */
  public BinaryDocValuesField(String name, BytesRef value) {
    super(name, TYPE);
    fieldsData = value;
--- a/lucene/core/src/java/org/apache/lucene/document/DoubleDocValuesField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/DoubleDocValuesField.java
@ -1,8 +1,5 @@
 package org.apache.lucene.document;

-import org.apache.lucene.index.AtomicReader; // javadocs
-import org.apache.lucene.search.FieldCache; // javadocs
-
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -20,6 +17,9 @@ import org.apache.lucene.search.FieldCache; // javadocs
 * limitations under the License.
 */

+import org.apache.lucene.index.AtomicReader; // javadocs
+import org.apache.lucene.search.FieldCache; // javadocs
+
 /**
 * Syntactic sugar for encoding doubles as NumericDocValues
 * via {@link Double#doubleToRawLongBits(double)}.
@ -33,6 +33,12 @@ import org.apache.lucene.search.FieldCache; // javadocs
 */
 public class DoubleDocValuesField extends NumericDocValuesField {

+  /** 
+   * Creates a new DocValues field with the specified 64-bit double value 
+   * @param name field name
+   * @param value 64-bit double value
+   * @throws IllegalArgumentException if the field name is null
+   */
  public DoubleDocValuesField(String name, double value) {
    super(name, Double.doubleToRawLongBits(value));
  }
--- a/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java
@ -1,8 +1,5 @@
 package org.apache.lucene.document;

-import org.apache.lucene.index.AtomicReader; // javadocs
-import org.apache.lucene.search.FieldCache; // javadocs
-
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -20,6 +17,9 @@ import org.apache.lucene.search.FieldCache; // javadocs
 * limitations under the License.
 */

+import org.apache.lucene.index.AtomicReader; // javadocs
+import org.apache.lucene.search.FieldCache; // javadocs
+
 /**
 * Syntactic sugar for encoding floats as NumericDocValues
 * via {@link Float#floatToRawIntBits(float)}.
@ -33,6 +33,12 @@ import org.apache.lucene.search.FieldCache; // javadocs
 */
 public class FloatDocValuesField extends NumericDocValuesField {

+  /** 
+   * Creates a new DocValues field with the specified 32-bit float value 
+   * @param name field name
+   * @param value 32-bit float value
+   * @throws IllegalArgumentException if the field name is null
+   */
  public FloatDocValuesField(String name, float value) {
    super(name, Float.floatToRawIntBits(value));
  }
--- a/lucene/core/src/java/org/apache/lucene/document/NumericDocValuesField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/NumericDocValuesField.java
@ -25,7 +25,7 @@ import org.apache.lucene.index.FieldInfo;
 * sorting or value retrieval. Here's an example usage:
 * 
 * <pre class="prettyprint">
- *   document.add(new LongDocValuesField(name, 22L));
+ *   document.add(new NumericDocValuesField(name, 22L));
 * </pre>
 * 
 * <p>
--- a/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java
@ -27,7 +27,7 @@ import org.apache.lucene.util.BytesRef;
 * sorting.  Here's an example usage:
 * 
 * <pre class="prettyprint">
- *   document.add(new SortedBytesDocValuesField(name, new BytesRef("hello")));
+ *   document.add(new SortedDocValuesField(name, new BytesRef("hello")));
 * </pre>
 * 
 * <p>
--- a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java
@ -23,6 +23,10 @@ import org.apache.lucene.util.BytesRef;
 * A per-document byte[]
 */
 public abstract class BinaryDocValues {
+  
+  /** Sole constructor. (For invocation by subclass 
+   * constructors, typically implicit.) */
+  protected BinaryDocValues() {}

  /** Lookup the value for document.
   *
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@ -1257,6 +1257,10 @@ public class CheckIndex {
    return status;
  }
  
+  /**
+   * Test docvalues.
+   * @lucene.experimental
+   */
  public static Status.DocValuesStatus testDocValues(AtomicReader reader,
                                                     PrintStream infoStream) {
    final Status.DocValuesStatus status = new Status.DocValuesStatus();
--- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
@ -48,6 +48,9 @@ import org.apache.lucene.util.Version;
 // nocommit move this back to test-framework!!!
 public class MultiDocValues {
  
+  /** No instantiation */
+  private MultiDocValues() {}
+  
  /** returns a NumericDocValues for a reader's norms (potentially merging on-the-fly) */
  // moved to src/java so SlowWrapper can use it... uggggggh
  public static NumericDocValues getNormValues(final IndexReader r, final String field) throws IOException {
--- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValues.java
@ -21,6 +21,11 @@ package org.apache.lucene.index;
 * A per-document numeric value.
 */
 public abstract class NumericDocValues {
+  
+  /** Sole constructor. (For invocation by subclass 
+   * constructors, typically implicit.) */
+  protected NumericDocValues() {}
+
  /**
   * Returns the numeric value for the specified document ID.
   * @param docID document ID to lookup
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
@ -28,6 +28,11 @@ import org.apache.lucene.util.BytesRef;
 * are dense and in increasing sorted order.
 */
 public abstract class SortedDocValues extends BinaryDocValues {
+  
+  /** Sole constructor. (For invocation by subclass 
+   * constructors, typically implicit.) */
+  protected SortedDocValues() {}
+
  /**
   * Returns the ordinal for the specified docID.
   * @param  docID document ID to lookup
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
@ -31,6 +31,7 @@ public class SortedDocValuesTermsEnum extends TermsEnum {
  private int currentOrd = -1;
  private final BytesRef term = new BytesRef();

+  /** Creates a new TermsEnum over the provided values */
  public SortedDocValuesTermsEnum(SortedDocValues values) {
    this.values = values;
  }
--- a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
@ -46,27 +46,39 @@ import org.apache.lucene.util.RamUsageEstimator;
 */
 public interface FieldCache {

+  /** Field values as 8-bit signed bytes */
  public static abstract class Bytes {
+    /** Return a single Byte representation of this field's value. */
    public abstract byte get(int docID);
  }

+  /** Field values as 16-bit signed shorts */
  public static abstract class Shorts {
+    /** Return a short representation of this field's value. */
    public abstract short get(int docID);
  }

+  /** Field values as 32-bit signed integers */
  public static abstract class Ints {
+    /** Return an integer representation of this field's value. */
    public abstract int get(int docID);
  }

+  /** Field values as 32-bit signed long integers */
  public static abstract class Longs {
+    /** Return an long representation of this field's value. */
    public abstract long get(int docID);
  }

+  /** Field values as 32-bit floats */
  public static abstract class Floats {
+    /** Return an float representation of this field's value. */
    public abstract float get(int docID);
  }

+  /** Field values as 64-bit doubles */
  public static abstract class Doubles {
+    /** Return an double representation of this field's value. */
    public abstract double get(int docID);
  }

@ -138,7 +150,7 @@ public interface FieldCache {
   * @see FieldCache#getDoubles(AtomicReader, String, FieldCache.DoubleParser, boolean)
   */
  public interface DoubleParser extends Parser {
-    /** Return an long representation of this field's value. */
+    /** Return an double representation of this field's value. */
    public double parseDouble(BytesRef term);
  }

--- a/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
@ -1184,6 +1184,7 @@ public class PackedInts {
    return new Header(format, valueCount, bitsPerValue, version);
  }
  
+  /** Header identifying the structure of a packed integer array. */
  public static class Header {

    private final Format format;
--- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/package.html
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/package.html
@ -80,8 +80,8 @@ field fall into a single group.</p>
 <p>Known limitations:</p>
 <ul>
  <li> For the two-pass grouping search, the group field must be a
-    single-valued indexed field.
-    {@link org.apache.lucene.search.FieldCache} is used to load the {@link org.apache.lucene.search.FieldCache.DocTermsIndex} for this field.
+    single-valued indexed field (or indexed as a {@link org.apache.lucene.document.SortedDocValuesField}).
+    {@link org.apache.lucene.search.FieldCache} is used to load the {@link org.apache.lucene.index.SortedDocValues} for this field.
  <li> Although Solr support grouping by function and this module has abstraction of what a group is, there are currently only
    implementations for grouping based on terms.
  <li> Sharding is not directly supported, though is not too
@ -196,25 +196,5 @@ fields, <code>FieldCache</code>, etc.).
  <code>GroupingSearch</code> convenience utility
 </p>

-<p>
-  There are also DocValues based implementations available for the group collectors. There are factory methods
-  available for creating dv based instances. A typical example using dv based grouping with the
-  <code>GroupingSearch</code> convenience utility:
-</p>
-
-<pre class="prettyprint">
-  boolean diskResident = true; // Whether values should fetched directly from disk by passing the Java heap space.
-  DocValues.Type docValuesType = DocValues.Type.BYTES_VAR_SORTED;
-  GroupingSearch groupingSearch = new GroupingSearch("author", docValuesType, diskResident);
-  groupingSearch.setGroupSort(groupSort);
-  groupingSearch.setFillSortFields(fillFields);
-
-  TermQuery query = new TermQuery(new Term("content", searchTerm));
-  // The docValuesType variable decides the generic type. When float is used this Double and in case of int this is Long
-  TopGroups&lt;BytesRef&gt; result = groupingSearch.search(indexSearcher, query, groupOffset, groupLimit);
-
-  // Render groupsResult...
-</pre>
-
 </body>
 </html>
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWDocValuesFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWDocValuesFormat.java
@ -23,6 +23,7 @@ import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;

+/** Read-write version of {@link Lucene40DocValuesFormat} for testing */
 public class Lucene40RWDocValuesFormat extends Lucene40DocValuesFormat {

  @Override
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWNormsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWNormsFormat.java
@ -1,11 +1,5 @@
 package org.apache.lucene.codecs.lucene40;

-import java.io.IOException;
-
-import org.apache.lucene.codecs.DocValuesConsumer;
-import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.SegmentWriteState;
-
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -23,6 +17,13 @@ import org.apache.lucene.index.SegmentWriteState;
 * limitations under the License.
 */

+import java.io.IOException;
+
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+
+/** Read-write version of {@link Lucene40NormsFormat} for testing */
 public class Lucene40RWNormsFormat extends Lucene40NormsFormat {

  @Override