From 710a1ca16092c02a5ae8b751ebd2eaad1c14ecd3 Mon Sep 17 00:00:00 2001
From: Robert Muir
+ * This is more efficient that {@link ICUCollationKeyAnalyzer} if the field
+ * only has one value: no uninversion is necessary to sort on the field,
+ * locale-sensitive range queries can still work via {@link FieldCacheRangeFilter},
+ * and the underlying data structures built at index-time are likely more efficient
+ * and use less memory than FieldCache.
*/
public final class ICUCollationDocValuesField extends Field {
private final String name;
@@ -33,6 +40,17 @@ public final class ICUCollationDocValuesField extends Field {
private final BytesRef bytes = new BytesRef();
private final RawCollationKey key = new RawCollationKey();
+ /**
+ * Create a new ICUCollationDocValuesField.
+ *
+ * NOTE: you should not create a new one for each document, instead
+ * just make one and reuse it during your indexing process, setting
+ * the value via {@link #setStringValue(String)}.
+ * @param name field name
+ * @param collator Collator for generating collation keys.
+ */
+ // TODO: can we make this trap-free? maybe just synchronize on the collator
+ // instead?
public ICUCollationDocValuesField(String name, Collator collator) {
super(name, SortedDocValuesField.TYPE);
this.name = name;
@@ -48,6 +66,7 @@ public final class ICUCollationDocValuesField extends Field {
return name;
}
+ @Override
public void setStringValue(String value) {
collator.getRawCollationKey(value, key);
bytes.bytes = key.bytes;
@@ -60,5 +79,5 @@ public final class ICUCollationDocValuesField extends Field {
return bytes;
}
- // nocommit: make this thing trap-free
+ // nocommit: UOE the other field methods? or set to empty bytesref initially so this just works...
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
index 06963b834a3..9dd4f820560 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
@@ -58,6 +58,10 @@ import org.apache.lucene.util.PriorityQueue;
* @lucene.experimental
*/
public abstract class DocValuesConsumer implements Closeable {
+
+ /** Sole constructor. (For invocation by subclass
+ * constructors, typically implicit.) */
+ protected DocValuesConsumer() {}
/**
* Writes numeric docvalues for a field.
@@ -232,7 +236,7 @@ public abstract class DocValuesConsumer implements Closeable {
});
}
- public static class SortedBytesMerger {
+ static class SortedBytesMerger {
public int numMergedTerms;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesFormat.java
index a6803e73439..2e47bb101c7 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesFormat.java
@@ -62,8 +62,20 @@ public abstract class DocValuesFormat implements NamedSPILoader.NamedSPI {
this.name = name;
}
+ /** Returns a {@link DocValuesConsumer} to write docvalues to the
+ * index. */
public abstract DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException;
+ /**
+ * Returns a {@link DocValuesProducer} to read docvalues from the index.
+ *
+ * NOTE: by the time this call returns, it must hold open any files it will
+ * need to use; else, those files may be deleted. Additionally, required files
+ * may be deleted during the execution of this call before there is a chance
+ * to open them. Under these circumstances an IOException should be thrown by
+ * the implementation. IOExceptions are expected and will automatically cause
+ * a retry of the segment opening logic with the newly revised segments.
+ */
public abstract DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException;
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
index 50fcd57c223..d34c51d10e7 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
@@ -25,16 +25,29 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
-// nocommit add javadocs stating that this must open all
-// necessary files "on init", not later eg in .getXXX, else
-// an IW that deletes a commit will cause an SR to hit
-// exceptions....
-
+/** Abstract API that produces numeric, binary and
+ * sorted docvalues.
+ *
+ * @lucene.experimental
+ */
public abstract class DocValuesProducer implements Closeable {
+
+ /** Sole constructor. (For invocation by subclass
+ * constructors, typically implicit.) */
+ protected DocValuesProducer() {}
+ /** Returns {@link NumericDocValues} for this field.
+ * The returned instance need not be thread-safe: it will only be
+ * used by a single thread. */
public abstract NumericDocValues getNumeric(FieldInfo field) throws IOException;
+ /** Returns {@link BinaryDocValues} for this field.
+ * The returned instance need not be thread-safe: it will only be
+ * used by a single thread. */
public abstract BinaryDocValues getBinary(FieldInfo field) throws IOException;
+ /** Returns {@link SortedDocValues} for this field.
+ * The returned instance need not be thread-safe: it will only be
+ * used by a single thread. */
public abstract SortedDocValues getSorted(FieldInfo field) throws IOException;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/NormsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/NormsFormat.java
index cc88dc42aab..dca6bbf5d89 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/NormsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/NormsFormat.java
@@ -35,7 +35,15 @@ public abstract class NormsFormat {
* index. */
public abstract DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException;
- /** Returns a {@link DocValuesProducer} to read norms from the
- * index. */
+ /**
+ * Returns a {@link DocValuesProducer} to read norms from the index.
+ *
+ * NOTE: by the time this call returns, it must hold open any files it will
+ * need to use; else, those files may be deleted. Additionally, required files
+ * may be deleted during the execution of this call before there is a chance
+ * to open them. Under these circumstances an IOException should be thrown by
+ * the implementation. IOExceptions are expected and will automatically cause
+ * a retry of the segment opening logic with the newly revised segments.
+ */
public abstract DocValuesProducer normsProducer(SegmentReadState state) throws IOException;
}
diff --git a/lucene/core/src/java/org/apache/lucene/document/BinaryDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/BinaryDocValuesField.java
index 831afafb152..267af743088 100644
--- a/lucene/core/src/java/org/apache/lucene/document/BinaryDocValuesField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/BinaryDocValuesField.java
@@ -1,8 +1,5 @@
package org.apache.lucene.document;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.util.BytesRef;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -20,14 +17,45 @@ import org.apache.lucene.util.BytesRef;
* limitations under the License.
*/
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Field that stores a per-document {@link BytesRef} value.
+ *
+ * The values are stored directly with no sharing, which is a good fit when
+ * the fields don't share (many) values, such as a title field. If values
+ * may be shared and sorted it's better to use {@link SortedDocValuesField}.
+ * Here's an example usage:
+ *
+ *
+ * If you also need to store the value, you should add a
+ * separate {@link StoredField} instance.
+ *
+ * @see BinaryDocValues
+ * */
public class BinaryDocValuesField extends StoredField {
+ /**
+ * Type for straight bytes DocValues.
+ */
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDocValueType(FieldInfo.DocValuesType.BINARY);
TYPE.freeze();
}
+ /**
+ * Create a new binary DocValues field.
+ * @param name field name
+ * @param value binary content
+ * @throws IllegalArgumentException if the field name is null
+ */
public BinaryDocValuesField(String name, BytesRef value) {
super(name, TYPE);
fieldsData = value;
diff --git a/lucene/core/src/java/org/apache/lucene/document/DoubleDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/DoubleDocValuesField.java
index f4d173d218d..dca10430b45 100644
--- a/lucene/core/src/java/org/apache/lucene/document/DoubleDocValuesField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/DoubleDocValuesField.java
@@ -1,8 +1,5 @@
package org.apache.lucene.document;
-import org.apache.lucene.index.AtomicReader; // javadocs
-import org.apache.lucene.search.FieldCache; // javadocs
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -20,6 +17,9 @@ import org.apache.lucene.search.FieldCache; // javadocs
* limitations under the License.
*/
+import org.apache.lucene.index.AtomicReader; // javadocs
+import org.apache.lucene.search.FieldCache; // javadocs
+
/**
* Syntactic sugar for encoding doubles as NumericDocValues
* via {@link Double#doubleToRawLongBits(double)}.
@@ -33,6 +33,12 @@ import org.apache.lucene.search.FieldCache; // javadocs
*/
public class DoubleDocValuesField extends NumericDocValuesField {
+ /**
+ * Creates a new DocValues field with the specified 64-bit double value
+ * @param name field name
+ * @param value 64-bit double value
+ * @throws IllegalArgumentException if the field name is null
+ */
public DoubleDocValuesField(String name, double value) {
super(name, Double.doubleToRawLongBits(value));
}
diff --git a/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java
index 2e687657826..c4635d8b396 100644
--- a/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java
@@ -1,8 +1,5 @@
package org.apache.lucene.document;
-import org.apache.lucene.index.AtomicReader; // javadocs
-import org.apache.lucene.search.FieldCache; // javadocs
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -20,6 +17,9 @@ import org.apache.lucene.search.FieldCache; // javadocs
* limitations under the License.
*/
+import org.apache.lucene.index.AtomicReader; // javadocs
+import org.apache.lucene.search.FieldCache; // javadocs
+
/**
* Syntactic sugar for encoding floats as NumericDocValues
* via {@link Float#floatToRawIntBits(float)}.
@@ -33,6 +33,12 @@ import org.apache.lucene.search.FieldCache; // javadocs
*/
public class FloatDocValuesField extends NumericDocValuesField {
+ /**
+ * Creates a new DocValues field with the specified 32-bit float value
+ * @param name field name
+ * @param value 32-bit float value
+ * @throws IllegalArgumentException if the field name is null
+ */
public FloatDocValuesField(String name, float value) {
super(name, Float.floatToRawIntBits(value));
}
diff --git a/lucene/core/src/java/org/apache/lucene/document/NumericDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/NumericDocValuesField.java
index 708926f4663..346fbee1a5b 100644
--- a/lucene/core/src/java/org/apache/lucene/document/NumericDocValuesField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/NumericDocValuesField.java
@@ -25,7 +25,7 @@ import org.apache.lucene.index.FieldInfo;
* sorting or value retrieval. Here's an example usage:
*
*
diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java
index b4332896c28..e8103e98eca 100644
--- a/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java
@@ -27,7 +27,7 @@ import org.apache.lucene.util.BytesRef;
* sorting. Here's an example usage:
*
*
diff --git a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java
index f18272e8ade..d31ec12bf69 100644
--- a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java
@@ -23,6 +23,10 @@ import org.apache.lucene.util.BytesRef;
* A per-document byte[]
*/
public abstract class BinaryDocValues {
+
+ /** Sole constructor. (For invocation by subclass
+ * constructors, typically implicit.) */
+ protected BinaryDocValues() {}
/** Lookup the value for document.
*
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index 23c9fb3ac6b..cfeb3b6b4f7 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -1257,6 +1257,10 @@ public class CheckIndex {
return status;
}
+ /**
+ * Test docvalues.
+ * @lucene.experimental
+ */
public static Status.DocValuesStatus testDocValues(AtomicReader reader,
PrintStream infoStream) {
final Status.DocValuesStatus status = new Status.DocValuesStatus();
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
index c13b1ce7873..360df524ffd 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
@@ -48,6 +48,9 @@ import org.apache.lucene.util.Version;
// nocommit move this back to test-framework!!!
public class MultiDocValues {
+ /** No instantiation */
+ private MultiDocValues() {}
+
/** returns a NumericDocValues for a reader's norms (potentially merging on-the-fly) */
// moved to src/java so SlowWrapper can use it... uggggggh
public static NumericDocValues getNormValues(final IndexReader r, final String field) throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValues.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValues.java
index c5fbe30450f..8678a5bbc68 100644
--- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValues.java
@@ -21,6 +21,11 @@ package org.apache.lucene.index;
* A per-document numeric value.
*/
public abstract class NumericDocValues {
+
+ /** Sole constructor. (For invocation by subclass
+ * constructors, typically implicit.) */
+ protected NumericDocValues() {}
+
/**
* Returns the numeric value for the specified document ID.
* @param docID document ID to lookup
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
index eddd72dbc32..e2506087e9b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
@@ -28,6 +28,11 @@ import org.apache.lucene.util.BytesRef;
* are dense and in increasing sorted order.
*/
public abstract class SortedDocValues extends BinaryDocValues {
+
+ /** Sole constructor. (For invocation by subclass
+ * constructors, typically implicit.) */
+ protected SortedDocValues() {}
+
/**
* Returns the ordinal for the specified docID.
* @param docID document ID to lookup
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
index 0f34e3298a2..6588aaf4e39 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
@@ -31,6 +31,7 @@ public class SortedDocValuesTermsEnum extends TermsEnum {
private int currentOrd = -1;
private final BytesRef term = new BytesRef();
+ /** Creates a new TermsEnum over the provided values */
public SortedDocValuesTermsEnum(SortedDocValues values) {
this.values = values;
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
index b45017e6b69..fba2b6447b4 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
@@ -46,27 +46,39 @@ import org.apache.lucene.util.RamUsageEstimator;
*/
public interface FieldCache {
+ /** Field values as 8-bit signed bytes */
public static abstract class Bytes {
+ /** Return a single Byte representation of this field's value. */
public abstract byte get(int docID);
}
+ /** Field values as 16-bit signed shorts */
public static abstract class Shorts {
+ /** Return a short representation of this field's value. */
public abstract short get(int docID);
}
+ /** Field values as 32-bit signed integers */
public static abstract class Ints {
+ /** Return an integer representation of this field's value. */
public abstract int get(int docID);
}
+ /** Field values as 32-bit signed long integers */
public static abstract class Longs {
+ /** Return an long representation of this field's value. */
public abstract long get(int docID);
}
+ /** Field values as 32-bit floats */
public static abstract class Floats {
+ /** Return an float representation of this field's value. */
public abstract float get(int docID);
}
+ /** Field values as 64-bit doubles */
public static abstract class Doubles {
+ /** Return an double representation of this field's value. */
public abstract double get(int docID);
}
@@ -138,7 +150,7 @@ public interface FieldCache {
* @see FieldCache#getDoubles(AtomicReader, String, FieldCache.DoubleParser, boolean)
*/
public interface DoubleParser extends Parser {
- /** Return an long representation of this field's value. */
+ /** Return an double representation of this field's value. */
public double parseDouble(BytesRef term);
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java b/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
index 6d1a2ab3bab..15808e16996 100644
--- a/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
@@ -1184,6 +1184,7 @@ public class PackedInts {
return new Header(format, valueCount, bitsPerValue, version);
}
+ /** Header identifying the structure of a packed integer array. */
public static class Header {
private final Format format;
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/package.html b/lucene/grouping/src/java/org/apache/lucene/search/grouping/package.html
index 79a8ecdd5d4..e45b66692be 100644
--- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/package.html
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/package.html
@@ -80,8 +80,8 @@ field fall into a single group. Known limitations:
+ * document.add(new BinaryDocValuesField(name, new BytesRef("hello")));
+ *
+ *
+ *
- * document.add(new LongDocValuesField(name, 22L));
+ * document.add(new NumericDocValuesField(name, 22L));
*
*
*
- * document.add(new SortedBytesDocValuesField(name, new BytesRef("hello")));
+ * document.add(new SortedDocValuesField(name, new BytesRef("hello")));
*
*
*
FieldCache
, etc.).
GroupingSearch
convenience utility
- There are also DocValues based implementations available for the group collectors. There are factory methods
- available for creating dv based instances. A typical example using dv based grouping with the
- GroupingSearch
convenience utility:
-
- boolean diskResident = true; // Whether values should fetched directly from disk by passing the Java heap space. - DocValues.Type docValuesType = DocValues.Type.BYTES_VAR_SORTED; - GroupingSearch groupingSearch = new GroupingSearch("author", docValuesType, diskResident); - groupingSearch.setGroupSort(groupSort); - groupingSearch.setFillSortFields(fillFields); - - TermQuery query = new TermQuery(new Term("content", searchTerm)); - // The docValuesType variable decides the generic type. When float is used this Double and in case of int this is Long - TopGroups<BytesRef> result = groupingSearch.search(indexSearcher, query, groupOffset, groupLimit); - - // Render groupsResult... --