mirror of https://github.com/apache/lucene.git
javadocs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1438847 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e9b5edc750
commit
710a1ca160
|
@ -19,13 +19,20 @@ package org.apache.lucene.collation;
|
|||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.search.FieldCacheRangeFilter;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.text.RawCollationKey;
|
||||
|
||||
/**
|
||||
* nocommit
|
||||
* Indexes collation keys as a single-valued {@link SortedDocValuesField}.
|
||||
* <p>
|
||||
* This is more efficient that {@link ICUCollationKeyAnalyzer} if the field
|
||||
* only has one value: no uninversion is necessary to sort on the field,
|
||||
* locale-sensitive range queries can still work via {@link FieldCacheRangeFilter},
|
||||
* and the underlying data structures built at index-time are likely more efficient
|
||||
* and use less memory than FieldCache.
|
||||
*/
|
||||
public final class ICUCollationDocValuesField extends Field {
|
||||
private final String name;
|
||||
|
@ -33,6 +40,17 @@ public final class ICUCollationDocValuesField extends Field {
|
|||
private final BytesRef bytes = new BytesRef();
|
||||
private final RawCollationKey key = new RawCollationKey();
|
||||
|
||||
/**
|
||||
* Create a new ICUCollationDocValuesField.
|
||||
* <p>
|
||||
* NOTE: you should not create a new one for each document, instead
|
||||
* just make one and reuse it during your indexing process, setting
|
||||
* the value via {@link #setStringValue(String)}.
|
||||
* @param name field name
|
||||
* @param collator Collator for generating collation keys.
|
||||
*/
|
||||
// TODO: can we make this trap-free? maybe just synchronize on the collator
|
||||
// instead?
|
||||
public ICUCollationDocValuesField(String name, Collator collator) {
|
||||
super(name, SortedDocValuesField.TYPE);
|
||||
this.name = name;
|
||||
|
@ -48,6 +66,7 @@ public final class ICUCollationDocValuesField extends Field {
|
|||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setStringValue(String value) {
|
||||
collator.getRawCollationKey(value, key);
|
||||
bytes.bytes = key.bytes;
|
||||
|
@ -60,5 +79,5 @@ public final class ICUCollationDocValuesField extends Field {
|
|||
return bytes;
|
||||
}
|
||||
|
||||
// nocommit: make this thing trap-free
|
||||
// nocommit: UOE the other field methods? or set to empty bytesref initially so this just works...
|
||||
}
|
||||
|
|
|
@ -58,6 +58,10 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class DocValuesConsumer implements Closeable {
|
||||
|
||||
/** Sole constructor. (For invocation by subclass
|
||||
* constructors, typically implicit.) */
|
||||
protected DocValuesConsumer() {}
|
||||
|
||||
/**
|
||||
* Writes numeric docvalues for a field.
|
||||
|
@ -232,7 +236,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
});
|
||||
}
|
||||
|
||||
public static class SortedBytesMerger {
|
||||
static class SortedBytesMerger {
|
||||
|
||||
public int numMergedTerms;
|
||||
|
||||
|
|
|
@ -62,8 +62,20 @@ public abstract class DocValuesFormat implements NamedSPILoader.NamedSPI {
|
|||
this.name = name;
|
||||
}
|
||||
|
||||
/** Returns a {@link DocValuesConsumer} to write docvalues to the
|
||||
* index. */
|
||||
public abstract DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException;
|
||||
|
||||
/**
|
||||
* Returns a {@link DocValuesProducer} to read docvalues from the index.
|
||||
* <p>
|
||||
* NOTE: by the time this call returns, it must hold open any files it will
|
||||
* need to use; else, those files may be deleted. Additionally, required files
|
||||
* may be deleted during the execution of this call before there is a chance
|
||||
* to open them. Under these circumstances an IOException should be thrown by
|
||||
* the implementation. IOExceptions are expected and will automatically cause
|
||||
* a retry of the segment opening logic with the newly revised segments.
|
||||
*/
|
||||
public abstract DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException;
|
||||
|
||||
@Override
|
||||
|
|
|
@ -25,16 +25,29 @@ import org.apache.lucene.index.FieldInfo;
|
|||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
|
||||
// nocommit add javadocs stating that this must open all
|
||||
// necessary files "on init", not later eg in .getXXX, else
|
||||
// an IW that deletes a commit will cause an SR to hit
|
||||
// exceptions....
|
||||
|
||||
/** Abstract API that produces numeric, binary and
|
||||
* sorted docvalues.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class DocValuesProducer implements Closeable {
|
||||
|
||||
/** Sole constructor. (For invocation by subclass
|
||||
* constructors, typically implicit.) */
|
||||
protected DocValuesProducer() {}
|
||||
|
||||
/** Returns {@link NumericDocValues} for this field.
|
||||
* The returned instance need not be thread-safe: it will only be
|
||||
* used by a single thread. */
|
||||
public abstract NumericDocValues getNumeric(FieldInfo field) throws IOException;
|
||||
|
||||
/** Returns {@link BinaryDocValues} for this field.
|
||||
* The returned instance need not be thread-safe: it will only be
|
||||
* used by a single thread. */
|
||||
public abstract BinaryDocValues getBinary(FieldInfo field) throws IOException;
|
||||
|
||||
/** Returns {@link SortedDocValues} for this field.
|
||||
* The returned instance need not be thread-safe: it will only be
|
||||
* used by a single thread. */
|
||||
public abstract SortedDocValues getSorted(FieldInfo field) throws IOException;
|
||||
}
|
||||
|
|
|
@ -35,7 +35,15 @@ public abstract class NormsFormat {
|
|||
* index. */
|
||||
public abstract DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException;
|
||||
|
||||
/** Returns a {@link DocValuesProducer} to read norms from the
|
||||
* index. */
|
||||
/**
|
||||
* Returns a {@link DocValuesProducer} to read norms from the index.
|
||||
* <p>
|
||||
* NOTE: by the time this call returns, it must hold open any files it will
|
||||
* need to use; else, those files may be deleted. Additionally, required files
|
||||
* may be deleted during the execution of this call before there is a chance
|
||||
* to open them. Under these circumstances an IOException should be thrown by
|
||||
* the implementation. IOExceptions are expected and will automatically cause
|
||||
* a retry of the segment opening logic with the newly revised segments.
|
||||
*/
|
||||
public abstract DocValuesProducer normsProducer(SegmentReadState state) throws IOException;
|
||||
}
|
||||
|
|
|
@ -1,8 +1,5 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -20,14 +17,45 @@ import org.apache.lucene.util.BytesRef;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Field that stores a per-document {@link BytesRef} value.
|
||||
* <p>
|
||||
* The values are stored directly with no sharing, which is a good fit when
|
||||
* the fields don't share (many) values, such as a title field. If values
|
||||
* may be shared and sorted it's better to use {@link SortedDocValuesField}.
|
||||
* Here's an example usage:
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* document.add(new BinaryDocValuesField(name, new BytesRef("hello")));
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* If you also need to store the value, you should add a
|
||||
* separate {@link StoredField} instance.
|
||||
*
|
||||
* @see BinaryDocValues
|
||||
* */
|
||||
public class BinaryDocValuesField extends StoredField {
|
||||
|
||||
/**
|
||||
* Type for straight bytes DocValues.
|
||||
*/
|
||||
public static final FieldType TYPE = new FieldType();
|
||||
static {
|
||||
TYPE.setDocValueType(FieldInfo.DocValuesType.BINARY);
|
||||
TYPE.freeze();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new binary DocValues field.
|
||||
* @param name field name
|
||||
* @param value binary content
|
||||
* @throws IllegalArgumentException if the field name is null
|
||||
*/
|
||||
public BinaryDocValuesField(String name, BytesRef value) {
|
||||
super(name, TYPE);
|
||||
fieldsData = value;
|
||||
|
|
|
@ -1,8 +1,5 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader; // javadocs
|
||||
import org.apache.lucene.search.FieldCache; // javadocs
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -20,6 +17,9 @@ import org.apache.lucene.search.FieldCache; // javadocs
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.AtomicReader; // javadocs
|
||||
import org.apache.lucene.search.FieldCache; // javadocs
|
||||
|
||||
/**
|
||||
* Syntactic sugar for encoding doubles as NumericDocValues
|
||||
* via {@link Double#doubleToRawLongBits(double)}.
|
||||
|
@ -33,6 +33,12 @@ import org.apache.lucene.search.FieldCache; // javadocs
|
|||
*/
|
||||
public class DoubleDocValuesField extends NumericDocValuesField {
|
||||
|
||||
/**
|
||||
* Creates a new DocValues field with the specified 64-bit double value
|
||||
* @param name field name
|
||||
* @param value 64-bit double value
|
||||
* @throws IllegalArgumentException if the field name is null
|
||||
*/
|
||||
public DoubleDocValuesField(String name, double value) {
|
||||
super(name, Double.doubleToRawLongBits(value));
|
||||
}
|
||||
|
|
|
@ -1,8 +1,5 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader; // javadocs
|
||||
import org.apache.lucene.search.FieldCache; // javadocs
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -20,6 +17,9 @@ import org.apache.lucene.search.FieldCache; // javadocs
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.AtomicReader; // javadocs
|
||||
import org.apache.lucene.search.FieldCache; // javadocs
|
||||
|
||||
/**
|
||||
* Syntactic sugar for encoding floats as NumericDocValues
|
||||
* via {@link Float#floatToRawIntBits(float)}.
|
||||
|
@ -33,6 +33,12 @@ import org.apache.lucene.search.FieldCache; // javadocs
|
|||
*/
|
||||
public class FloatDocValuesField extends NumericDocValuesField {
|
||||
|
||||
/**
|
||||
* Creates a new DocValues field with the specified 32-bit float value
|
||||
* @param name field name
|
||||
* @param value 32-bit float value
|
||||
* @throws IllegalArgumentException if the field name is null
|
||||
*/
|
||||
public FloatDocValuesField(String name, float value) {
|
||||
super(name, Float.floatToRawIntBits(value));
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.index.FieldInfo;
|
|||
* sorting or value retrieval. Here's an example usage:
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* document.add(new LongDocValuesField(name, 22L));
|
||||
* document.add(new NumericDocValuesField(name, 22L));
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* sorting. Here's an example usage:
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* document.add(new SortedBytesDocValuesField(name, new BytesRef("hello")));
|
||||
* document.add(new SortedDocValuesField(name, new BytesRef("hello")));
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
|
|
|
@ -23,6 +23,10 @@ import org.apache.lucene.util.BytesRef;
|
|||
* A per-document byte[]
|
||||
*/
|
||||
public abstract class BinaryDocValues {
|
||||
|
||||
/** Sole constructor. (For invocation by subclass
|
||||
* constructors, typically implicit.) */
|
||||
protected BinaryDocValues() {}
|
||||
|
||||
/** Lookup the value for document.
|
||||
*
|
||||
|
|
|
@ -1257,6 +1257,10 @@ public class CheckIndex {
|
|||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test docvalues.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public static Status.DocValuesStatus testDocValues(AtomicReader reader,
|
||||
PrintStream infoStream) {
|
||||
final Status.DocValuesStatus status = new Status.DocValuesStatus();
|
||||
|
|
|
@ -48,6 +48,9 @@ import org.apache.lucene.util.Version;
|
|||
// nocommit move this back to test-framework!!!
|
||||
public class MultiDocValues {
|
||||
|
||||
/** No instantiation */
|
||||
private MultiDocValues() {}
|
||||
|
||||
/** returns a NumericDocValues for a reader's norms (potentially merging on-the-fly) */
|
||||
// moved to src/java so SlowWrapper can use it... uggggggh
|
||||
public static NumericDocValues getNormValues(final IndexReader r, final String field) throws IOException {
|
||||
|
|
|
@ -21,6 +21,11 @@ package org.apache.lucene.index;
|
|||
* A per-document numeric value.
|
||||
*/
|
||||
public abstract class NumericDocValues {
|
||||
|
||||
/** Sole constructor. (For invocation by subclass
|
||||
* constructors, typically implicit.) */
|
||||
protected NumericDocValues() {}
|
||||
|
||||
/**
|
||||
* Returns the numeric value for the specified document ID.
|
||||
* @param docID document ID to lookup
|
||||
|
|
|
@ -28,6 +28,11 @@ import org.apache.lucene.util.BytesRef;
|
|||
* are dense and in increasing sorted order.
|
||||
*/
|
||||
public abstract class SortedDocValues extends BinaryDocValues {
|
||||
|
||||
/** Sole constructor. (For invocation by subclass
|
||||
* constructors, typically implicit.) */
|
||||
protected SortedDocValues() {}
|
||||
|
||||
/**
|
||||
* Returns the ordinal for the specified docID.
|
||||
* @param docID document ID to lookup
|
||||
|
|
|
@ -31,6 +31,7 @@ public class SortedDocValuesTermsEnum extends TermsEnum {
|
|||
private int currentOrd = -1;
|
||||
private final BytesRef term = new BytesRef();
|
||||
|
||||
/** Creates a new TermsEnum over the provided values */
|
||||
public SortedDocValuesTermsEnum(SortedDocValues values) {
|
||||
this.values = values;
|
||||
}
|
||||
|
|
|
@ -46,27 +46,39 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
*/
|
||||
public interface FieldCache {
|
||||
|
||||
/** Field values as 8-bit signed bytes */
|
||||
public static abstract class Bytes {
|
||||
/** Return a single Byte representation of this field's value. */
|
||||
public abstract byte get(int docID);
|
||||
}
|
||||
|
||||
/** Field values as 16-bit signed shorts */
|
||||
public static abstract class Shorts {
|
||||
/** Return a short representation of this field's value. */
|
||||
public abstract short get(int docID);
|
||||
}
|
||||
|
||||
/** Field values as 32-bit signed integers */
|
||||
public static abstract class Ints {
|
||||
/** Return an integer representation of this field's value. */
|
||||
public abstract int get(int docID);
|
||||
}
|
||||
|
||||
/** Field values as 32-bit signed long integers */
|
||||
public static abstract class Longs {
|
||||
/** Return an long representation of this field's value. */
|
||||
public abstract long get(int docID);
|
||||
}
|
||||
|
||||
/** Field values as 32-bit floats */
|
||||
public static abstract class Floats {
|
||||
/** Return an float representation of this field's value. */
|
||||
public abstract float get(int docID);
|
||||
}
|
||||
|
||||
/** Field values as 64-bit doubles */
|
||||
public static abstract class Doubles {
|
||||
/** Return an double representation of this field's value. */
|
||||
public abstract double get(int docID);
|
||||
}
|
||||
|
||||
|
@ -138,7 +150,7 @@ public interface FieldCache {
|
|||
* @see FieldCache#getDoubles(AtomicReader, String, FieldCache.DoubleParser, boolean)
|
||||
*/
|
||||
public interface DoubleParser extends Parser {
|
||||
/** Return an long representation of this field's value. */
|
||||
/** Return an double representation of this field's value. */
|
||||
public double parseDouble(BytesRef term);
|
||||
}
|
||||
|
||||
|
|
|
@ -1184,6 +1184,7 @@ public class PackedInts {
|
|||
return new Header(format, valueCount, bitsPerValue, version);
|
||||
}
|
||||
|
||||
/** Header identifying the structure of a packed integer array. */
|
||||
public static class Header {
|
||||
|
||||
private final Format format;
|
||||
|
|
|
@ -80,8 +80,8 @@ field fall into a single group.</p>
|
|||
<p>Known limitations:</p>
|
||||
<ul>
|
||||
<li> For the two-pass grouping search, the group field must be a
|
||||
single-valued indexed field.
|
||||
{@link org.apache.lucene.search.FieldCache} is used to load the {@link org.apache.lucene.search.FieldCache.DocTermsIndex} for this field.
|
||||
single-valued indexed field (or indexed as a {@link org.apache.lucene.document.SortedDocValuesField}).
|
||||
{@link org.apache.lucene.search.FieldCache} is used to load the {@link org.apache.lucene.index.SortedDocValues} for this field.
|
||||
<li> Although Solr support grouping by function and this module has abstraction of what a group is, there are currently only
|
||||
implementations for grouping based on terms.
|
||||
<li> Sharding is not directly supported, though is not too
|
||||
|
@ -196,25 +196,5 @@ fields, <code>FieldCache</code>, etc.).
|
|||
<code>GroupingSearch</code> convenience utility
|
||||
</p>
|
||||
|
||||
<p>
|
||||
There are also DocValues based implementations available for the group collectors. There are factory methods
|
||||
available for creating dv based instances. A typical example using dv based grouping with the
|
||||
<code>GroupingSearch</code> convenience utility:
|
||||
</p>
|
||||
|
||||
<pre class="prettyprint">
|
||||
boolean diskResident = true; // Whether values should fetched directly from disk by passing the Java heap space.
|
||||
DocValues.Type docValuesType = DocValues.Type.BYTES_VAR_SORTED;
|
||||
GroupingSearch groupingSearch = new GroupingSearch("author", docValuesType, diskResident);
|
||||
groupingSearch.setGroupSort(groupSort);
|
||||
groupingSearch.setFillSortFields(fillFields);
|
||||
|
||||
TermQuery query = new TermQuery(new Term("content", searchTerm));
|
||||
// The docValuesType variable decides the generic type. When float is used this Double and in case of int this is Long
|
||||
TopGroups<BytesRef> result = groupingSearch.search(indexSearcher, query, groupOffset, groupLimit);
|
||||
|
||||
// Render groupsResult...
|
||||
</pre>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.codecs.DocValuesConsumer;
|
|||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
||||
/** Read-write version of {@link Lucene40DocValuesFormat} for testing */
|
||||
public class Lucene40RWDocValuesFormat extends Lucene40DocValuesFormat {
|
||||
|
||||
@Override
|
||||
|
|
|
@ -1,11 +1,5 @@
|
|||
package org.apache.lucene.codecs.lucene40;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -23,6 +17,13 @@ import org.apache.lucene.index.SegmentWriteState;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
||||
/** Read-write version of {@link Lucene40NormsFormat} for testing */
|
||||
public class Lucene40RWNormsFormat extends Lucene40NormsFormat {
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue