git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1438647 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-01-25 18:57:59 +00:00
parent 86e30c7f7f
commit 9cbdc18447
4 changed files with 76 additions and 6 deletions

View File

@ -29,22 +29,67 @@ import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.PriorityQueue;
// prototype streaming DV api
/**
* Abstract API that consumes numeric, binary and
* sorted docvalues. Concrete implementations of this
* actually do "something" with the docvalues (write it into
* the index in a specific format).
* <p>
* The lifecycle is:
* <ol>
* <li>DocValuesConsumer is created by
* {@link DocValuesFormat#fieldsConsumer(SegmentWriteState)} or
* {@link NormsFormat#normsConsumer(SegmentWriteState)}.
* <li>{@link #addNumericField}, {@link #addBinaryField},
* or {@link #addSortedField} are called for each Numeric,
* Binary, or Sorted docvalues field. The API is a "pull" rather
* than "push", and the implementation is free to iterate over the
* values multiple times ({@link Iterable#iterator()}).
* <li>After all fields are added, the consumer is {@link #close}d.
* </ol>
*
* @lucene.experimental
*/
public abstract class DocValuesConsumer implements Closeable {
/**
* Writes numeric docvalues for a field.
* @param field field information
* @param values Iterable of numeric values (one for each document).
* @throws IOException if an I/O error occurred.
*/
public abstract void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException;
/**
* Writes binary docvalues for a field.
* @param field field information
* @param values Iterable of binary values (one for each document).
* @throws IOException if an I/O error occurred.
*/
public abstract void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException;
/**
* Writes pre-sorted binary docvalues for a field.
* @param field field information
* @param values Iterable of binary values in sorted order (deduplicated).
* @param docToOrd Iterable of ordinals (one for each document).
* @throws IOException if an I/O error occurred.
*/
public abstract void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException;
// dead simple impl: codec can optimize
/**
* Merges the numeric docvalues from <code>toMerge</code>.
* <p>
* The default implementation calls {@link #addNumericField}, passing
* an Iterable that merges and filters deleted documents on the fly.
*/
public void mergeNumericField(FieldInfo fieldInfo, final MergeState mergeState, final List<NumericDocValues> toMerge) throws IOException {
addNumericField(fieldInfo,
@ -113,7 +158,12 @@ public abstract class DocValuesConsumer implements Closeable {
});
}
// dead simple impl: codec can optimize
/**
* Merges the binary docvalues from <code>toMerge</code>.
* <p>
* The default implementation calls {@link #addBinaryField}, passing
* an Iterable that merges and filters deleted documents on the fly.
*/
public void mergeBinaryField(FieldInfo fieldInfo, final MergeState mergeState, final List<BinaryDocValues> toMerge) throws IOException {
addBinaryField(fieldInfo,
@ -319,6 +369,12 @@ public abstract class DocValuesConsumer implements Closeable {
*/
}
/**
* Merges the sorted docvalues from <code>toMerge</code>.
* <p>
* The default implementation calls {@link #addSortedField}, passing
* an Iterable that merges ordinals and values and filters deleted documents .
*/
public void mergeSortedField(FieldInfo fieldInfo, final MergeState mergeState, List<SortedDocValues> toMerge) throws IOException {
final SortedBytesMerger merger = new SortedBytesMerger();

View File

@ -18,12 +18,26 @@ package org.apache.lucene.codecs;
*/
import java.io.IOException;
import java.util.ServiceLoader;
import java.util.Set;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.util.NamedSPILoader;
/**
* Encodes/decodes per-document values.
* <p>
* Note, when extending this class, the name ({@link #getName}) may
* written into the index in certain configurations. In order for the segment
* to be read, the name must resolve to your implementation via {@link #forName(String)}.
* This method uses Java's
* {@link ServiceLoader Service Provider Interface} (SPI) to resolve format names.
* <p>
* If you implement your own format, make sure that it has a no-arg constructor
* so SPI can load it.
* @see ServiceLoader
* @lucene.experimental */
public abstract class DocValuesFormat implements NamedSPILoader.NamedSPI {
private static final NamedSPILoader<DocValuesFormat> loader =

View File

@ -23,7 +23,7 @@ import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
/**
* format for normalization factors
* Encodes/decodes per-document score normalization values.
*/
public abstract class NormsFormat {
/** Sole constructor. (For invocation by subclass

View File

@ -143,7 +143,7 @@ its numbering.</p>
what files it uses,
</li>
<li>
{@link org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat Field names}.
{@link org.apache.lucene.codecs.lucene42.Lucene42FieldInfosFormat Field names}.
This contains the set of field names used in the index.
</li>
<li>
@ -249,7 +249,7 @@ file.</td>
systems that frequently run out of file handles.</td>
</tr>
<tr>
<td>{@link org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat Fields}</td>
<td>{@link org.apache.lucene.codecs.lucene42.Lucene42FieldInfosFormat Fields}</td>
<td>.fnm</td>
<td>Stores information about the fields</td>
</tr>