mirror of https://github.com/apache/lucene.git
LUCENE-3433: Random access non RAM resident IndexDocValues (CSF)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1179970 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
70ee6dbdb6
commit
63b736c033
|
@ -548,6 +548,10 @@ New features
|
|||
* LUCENE-2309: Added IndexableField.tokenStream(Analyzer) which is now
|
||||
responsible for creating the TokenStreams for Fields when they are to
|
||||
be indexed. (Chris Male)
|
||||
|
||||
* LUCENE-3433: Added random access for non RAM resident IndexDocValues. RAM
|
||||
resident and disk resident IndexDocValues are now exposed via the Source
|
||||
interface. ValuesEnum has been removed in favour of Source. (Simon Willnauer)
|
||||
|
||||
Optimizations
|
||||
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.document;
|
|||
import java.io.Reader;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.IndexableFieldType;
|
||||
import org.apache.lucene.index.values.PerDocFieldValues;
|
||||
import org.apache.lucene.index.values.ValueType;
|
||||
|
@ -317,21 +316,34 @@ public class IndexDocValuesField extends Field implements PerDocFieldValues {
|
|||
final String value;
|
||||
switch (type) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
value = "bytes:bytes.utf8ToString();";
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_VAR_SORTED:
|
||||
// don't use to unicode string this is not necessarily unicode here
|
||||
value = "bytes: " + bytes.toString();
|
||||
break;
|
||||
case FIXED_INTS_16:
|
||||
value = "int16: " + longValue;
|
||||
break;
|
||||
case FIXED_INTS_32:
|
||||
value = "int32: " + longValue;
|
||||
break;
|
||||
case FIXED_INTS_64:
|
||||
value = "int64: " + longValue;
|
||||
break;
|
||||
case FIXED_INTS_8:
|
||||
value = "int8: " + longValue;
|
||||
break;
|
||||
case VAR_INTS:
|
||||
value = "int:" + longValue;
|
||||
value = "vint: " + longValue;
|
||||
break;
|
||||
case FLOAT_32:
|
||||
value = "float32:" + doubleValue;
|
||||
value = "float32: " + doubleValue;
|
||||
break;
|
||||
case FLOAT_64:
|
||||
value = "float64:" + doubleValue;
|
||||
value = "float64: " + doubleValue;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("unknown type: " + type);
|
||||
|
@ -353,14 +365,18 @@ public class IndexDocValuesField extends Field implements PerDocFieldValues {
|
|||
final IndexDocValuesField valField = new IndexDocValuesField(field.name(), field.fieldType(), field.stringValue());
|
||||
switch (type) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_VAR_SORTED:
|
||||
BytesRef ref = field.isBinary() ? field.binaryValue() : new BytesRef(field.stringValue());
|
||||
valField.setBytes(ref, type);
|
||||
break;
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
case VAR_INTS:
|
||||
valField.setInt(Long.parseLong(field.stringValue()));
|
||||
break;
|
||||
|
|
|
@ -41,7 +41,7 @@ import java.util.Map;
|
|||
import org.apache.lucene.index.codecs.BlockTreeTermsReader;
|
||||
import org.apache.lucene.index.codecs.PerDocValues;
|
||||
import org.apache.lucene.index.values.IndexDocValues;
|
||||
import org.apache.lucene.index.values.ValuesEnum;
|
||||
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -1070,27 +1070,28 @@ public class CheckIndex {
|
|||
if (docValues == null) {
|
||||
continue;
|
||||
}
|
||||
final ValuesEnum values = docValues.getEnum();
|
||||
while (values.nextDoc() != ValuesEnum.NO_MORE_DOCS) {
|
||||
final Source values = docValues.getDirectSource();
|
||||
final int maxDoc = reader.maxDoc();
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
switch (fieldInfo.docValues) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
values.bytes();
|
||||
values.getBytes(i, new BytesRef());
|
||||
break;
|
||||
case FLOAT_32:
|
||||
case FLOAT_64:
|
||||
values.getFloat();
|
||||
values.getFloat(i);
|
||||
break;
|
||||
case VAR_INTS:
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
values.getInt();
|
||||
values.getInt(i);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("Field: " + fieldInfo.name
|
||||
|
|
|
@ -652,31 +652,30 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
case BYTES_FIXED_DEREF:
|
||||
b = 5;
|
||||
break;
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
b = 6;
|
||||
break;
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
b = 7;
|
||||
break;
|
||||
case BYTES_VAR_DEREF:
|
||||
case FIXED_INTS_16:
|
||||
b = 8;
|
||||
break;
|
||||
case BYTES_VAR_SORTED:
|
||||
case FIXED_INTS_32:
|
||||
b = 9;
|
||||
break;
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_64:
|
||||
b = 10;
|
||||
break;
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_8:
|
||||
b = 11;
|
||||
break;
|
||||
case FIXED_INTS_64:
|
||||
case BYTES_FIXED_SORTED:
|
||||
b = 12;
|
||||
break;
|
||||
case FIXED_INTS_8:
|
||||
case BYTES_VAR_SORTED:
|
||||
b = 13;
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new IllegalStateException("unhandled indexValues type " + fi.docValues);
|
||||
}
|
||||
|
@ -754,29 +753,29 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
docValuesType = ValueType.BYTES_FIXED_DEREF;
|
||||
break;
|
||||
case 6:
|
||||
docValuesType = ValueType.BYTES_FIXED_SORTED;
|
||||
break;
|
||||
case 7:
|
||||
docValuesType = ValueType.BYTES_VAR_STRAIGHT;
|
||||
break;
|
||||
case 8:
|
||||
case 7:
|
||||
docValuesType = ValueType.BYTES_VAR_DEREF;
|
||||
break;
|
||||
case 9:
|
||||
docValuesType = ValueType.BYTES_VAR_SORTED;
|
||||
break;
|
||||
case 10:
|
||||
case 8:
|
||||
docValuesType = ValueType.FIXED_INTS_16;
|
||||
break;
|
||||
case 11:
|
||||
case 9:
|
||||
docValuesType = ValueType.FIXED_INTS_32;
|
||||
break;
|
||||
case 12:
|
||||
case 10:
|
||||
docValuesType = ValueType.FIXED_INTS_64;
|
||||
break;
|
||||
case 13:
|
||||
case 11:
|
||||
docValuesType = ValueType.FIXED_INTS_8;
|
||||
break;
|
||||
break;
|
||||
case 12:
|
||||
docValuesType = ValueType.BYTES_FIXED_SORTED;
|
||||
break;
|
||||
case 13:
|
||||
docValuesType = ValueType.BYTES_VAR_SORTED;
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new IllegalStateException("unhandled indexValues type " + b);
|
||||
|
|
|
@ -58,11 +58,11 @@ public abstract class DocValuesReaderBase extends PerDocValues {
|
|||
public Collection<String> fields() {
|
||||
return docValues().keySet();
|
||||
}
|
||||
|
||||
|
||||
public Comparator<BytesRef> getComparator() throws IOException {
|
||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
|
||||
|
||||
// Only opens files... doesn't actually load any values
|
||||
protected TreeMap<String, IndexDocValues> load(FieldInfos fieldInfos,
|
||||
String segment, int docCount, Directory dir, int codecId, IOContext context)
|
||||
|
@ -121,9 +121,9 @@ public abstract class DocValuesReaderBase extends PerDocValues {
|
|||
case VAR_INTS:
|
||||
return Ints.getValues(dir, id, docCount, type, context);
|
||||
case FLOAT_32:
|
||||
return Floats.getValues(dir, id, docCount, context);
|
||||
return Floats.getValues(dir, id, docCount, context, type);
|
||||
case FLOAT_64:
|
||||
return Floats.getValues(dir, id, docCount, context);
|
||||
return Floats.getValues(dir, id, docCount, context, type);
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount, getComparator(), context);
|
||||
case BYTES_FIXED_DEREF:
|
||||
|
|
|
@ -54,7 +54,7 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
|
|||
@Override
|
||||
public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
|
||||
return Writer.create(field.getDocValues(),
|
||||
docValuesId(segmentName, codecId, field.number),
|
||||
docValuesId(segmentName, codecId, field.number),
|
||||
getDirectory(), getComparator(), bytesUsed, context);
|
||||
}
|
||||
|
||||
|
@ -62,6 +62,7 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
|
|||
return segmentsName + "_" + codecID + "-" + fieldId;
|
||||
}
|
||||
|
||||
|
||||
public Comparator<BytesRef> getComparator() throws IOException {
|
||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
|
|
|
@ -56,9 +56,9 @@ public class SepDocValuesConsumer extends DocValuesWriterBase {
|
|||
switch (fieldInfo.getDocValues()) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_VAR_SORTED:
|
||||
files.add(IndexFileNames.segmentFileName(filename, "",
|
||||
Writer.INDEX_EXTENSION));
|
||||
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
|
||||
|
@ -77,7 +77,6 @@ public class SepDocValuesConsumer extends DocValuesWriterBase {
|
|||
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
|
||||
Writer.DATA_EXTENSION));
|
||||
break;
|
||||
|
||||
default:
|
||||
assert false;
|
||||
}
|
||||
|
|
|
@ -26,14 +26,12 @@ import java.util.concurrent.atomic.AtomicLong;
|
|||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.values.IndexDocValues.SortedSource;
|
||||
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
|
@ -50,8 +48,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
/**
|
||||
* Provides concrete Writer/Reader implementations for <tt>byte[]</tt> value per
|
||||
* document. There are 6 package-private default implementations of this, for
|
||||
* all combinations of {@link Mode#DEREF}/{@link Mode#STRAIGHT}/
|
||||
* {@link Mode#SORTED} x fixed-length/variable-length.
|
||||
* all combinations of {@link Mode#DEREF}/{@link Mode#STRAIGHT} x fixed-length/variable-length.
|
||||
*
|
||||
* <p>
|
||||
* NOTE: Currently the total amount of byte[] data stored (across a single
|
||||
|
@ -101,11 +98,12 @@ public final class Bytes {
|
|||
* the segment name and a unique id per segment.
|
||||
* @param mode
|
||||
* the writers store mode
|
||||
* @param comp
|
||||
* a {@link BytesRef} comparator - only used with {@link Mode#SORTED}
|
||||
* @param fixedSize
|
||||
* <code>true</code> if all bytes subsequently passed to the
|
||||
* {@link Writer} will have the same length
|
||||
* @param sortComparator {@link BytesRef} comparator used by sorted variants.
|
||||
* If <code>null</code> {@link BytesRef#getUTF8SortedAsUnicodeComparator()}
|
||||
* is used instead
|
||||
* @param bytesUsed
|
||||
* an {@link AtomicLong} instance to track the used bytes within the
|
||||
* {@link Writer}. A call to {@link Writer#finish(int)} will release
|
||||
|
@ -117,12 +115,12 @@ public final class Bytes {
|
|||
* if the files for the writer can not be created.
|
||||
*/
|
||||
public static Writer getWriter(Directory dir, String id, Mode mode,
|
||||
Comparator<BytesRef> comp, boolean fixedSize, Counter bytesUsed, IOContext context)
|
||||
boolean fixedSize, Comparator<BytesRef> sortComparator, Counter bytesUsed, IOContext context)
|
||||
throws IOException {
|
||||
// TODO -- i shouldn't have to specify fixed? can
|
||||
// track itself & do the write thing at write time?
|
||||
if (comp == null) {
|
||||
comp = BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
if (sortComparator == null) {
|
||||
sortComparator = BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
|
||||
if (fixedSize) {
|
||||
|
@ -131,7 +129,7 @@ public final class Bytes {
|
|||
} else if (mode == Mode.DEREF) {
|
||||
return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed, context);
|
||||
} else if (mode == Mode.SORTED) {
|
||||
return new FixedSortedBytesImpl.Writer(dir, id, comp, bytesUsed, context);
|
||||
return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context);
|
||||
}
|
||||
} else {
|
||||
if (mode == Mode.STRAIGHT) {
|
||||
|
@ -139,7 +137,7 @@ public final class Bytes {
|
|||
} else if (mode == Mode.DEREF) {
|
||||
return new VarDerefBytesImpl.Writer(dir, id, bytesUsed, context);
|
||||
} else if (mode == Mode.SORTED) {
|
||||
return new VarSortedBytesImpl.Writer(dir, id, comp, bytesUsed, context);
|
||||
return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -163,30 +161,34 @@ public final class Bytes {
|
|||
* otherwise <code>false</code>
|
||||
* @param maxDoc
|
||||
* the number of document values stored for the given ID
|
||||
* @param sortComparator byte comparator used by sorted variants
|
||||
* @param sortComparator {@link BytesRef} comparator used by sorted variants.
|
||||
* If <code>null</code> {@link BytesRef#getUTF8SortedAsUnicodeComparator()}
|
||||
* is used instead
|
||||
* @return an initialized {@link IndexDocValues} instance.
|
||||
* @throws IOException
|
||||
* if an {@link IOException} occurs
|
||||
*/
|
||||
public static IndexDocValues getValues(Directory dir, String id, Mode mode,
|
||||
boolean fixedSize, int maxDoc, Comparator<BytesRef> sortComparator, IOContext context) throws IOException {
|
||||
|
||||
if (sortComparator == null) {
|
||||
sortComparator = BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
// TODO -- I can peek @ header to determing fixed/mode?
|
||||
if (fixedSize) {
|
||||
if (mode == Mode.STRAIGHT) {
|
||||
return new FixedStraightBytesImpl.Reader(dir, id, maxDoc, context);
|
||||
return new FixedStraightBytesImpl.FixedStraightReader(dir, id, maxDoc, context);
|
||||
} else if (mode == Mode.DEREF) {
|
||||
return new FixedDerefBytesImpl.Reader(dir, id, maxDoc, context);
|
||||
return new FixedDerefBytesImpl.FixedDerefReader(dir, id, maxDoc, context);
|
||||
} else if (mode == Mode.SORTED) {
|
||||
return new FixedSortedBytesImpl.Reader(dir, id, maxDoc, context);
|
||||
return new FixedSortedBytesImpl.Reader(dir, id, maxDoc, context, ValueType.BYTES_FIXED_SORTED, sortComparator);
|
||||
}
|
||||
} else {
|
||||
if (mode == Mode.STRAIGHT) {
|
||||
return new VarStraightBytesImpl.Reader(dir, id, maxDoc, context);
|
||||
return new VarStraightBytesImpl.VarStraightReader(dir, id, maxDoc, context);
|
||||
} else if (mode == Mode.DEREF) {
|
||||
return new VarDerefBytesImpl.Reader(dir, id, maxDoc, context);
|
||||
return new VarDerefBytesImpl.VarDerefReader(dir, id, maxDoc, context);
|
||||
} else if (mode == Mode.SORTED) {
|
||||
return new VarSortedBytesImpl.Reader(dir, id, maxDoc, sortComparator, context);
|
||||
return new VarSortedBytesImpl.Reader(dir, id, maxDoc,context, ValueType.BYTES_VAR_SORTED, sortComparator);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -196,7 +198,6 @@ public final class Bytes {
|
|||
// TODO open up this API?
|
||||
static abstract class BytesSourceBase extends Source {
|
||||
private final PagedBytes pagedBytes;
|
||||
private final ValueType type;
|
||||
protected final IndexInput datIn;
|
||||
protected final IndexInput idxIn;
|
||||
protected final static int PAGED_BYTES_BITS = 15;
|
||||
|
@ -206,6 +207,7 @@ public final class Bytes {
|
|||
|
||||
protected BytesSourceBase(IndexInput datIn, IndexInput idxIn,
|
||||
PagedBytes pagedBytes, long bytesToRead, ValueType type) throws IOException {
|
||||
super(type);
|
||||
assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
|
||||
+ (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
|
||||
this.datIn = datIn;
|
||||
|
@ -214,192 +216,15 @@ public final class Bytes {
|
|||
this.pagedBytes.copy(datIn, bytesToRead);
|
||||
data = pagedBytes.freeze(true);
|
||||
this.idxIn = idxIn;
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
data.close(); // close data
|
||||
} finally {
|
||||
try {
|
||||
if (datIn != null) {
|
||||
datIn.close();
|
||||
}
|
||||
} finally {
|
||||
if (idxIn != null) {// if straight - no index needed
|
||||
idxIn.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return type;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns one greater than the largest possible document number.
|
||||
*/
|
||||
protected abstract int maxDoc();
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||
return new SourceEnum(attrSource, type(), this, maxDoc()) {
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= numDocs) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
while (source.getBytes(target, bytesRef).length == 0) {
|
||||
if (++target >= numDocs) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
return pos = target;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static abstract class DerefBytesSourceBase extends BytesSourceBase {
|
||||
protected final PackedInts.Reader addresses;
|
||||
public DerefBytesSourceBase(IndexInput datIn, IndexInput idxIn, long bytesToRead, ValueType type) throws IOException {
|
||||
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), bytesToRead, type);
|
||||
addresses = PackedInts.getReader(idxIn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return addresses.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int maxDoc() {
|
||||
return addresses.size();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static abstract class BytesSortedSourceBase extends SortedSource {
|
||||
private final PagedBytes pagedBytes;
|
||||
private final Comparator<BytesRef> comp;
|
||||
protected final PackedInts.Reader docToOrdIndex;
|
||||
private final ValueType type;
|
||||
|
||||
protected final IndexInput datIn;
|
||||
protected final IndexInput idxIn;
|
||||
protected final BytesRef defaultValue = new BytesRef();
|
||||
protected final static int PAGED_BYTES_BITS = 15;
|
||||
protected final PagedBytes.Reader data;
|
||||
|
||||
|
||||
protected BytesSortedSourceBase(IndexInput datIn, IndexInput idxIn,
|
||||
Comparator<BytesRef> comp, long bytesToRead, ValueType type) throws IOException {
|
||||
this(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), bytesToRead, type);
|
||||
}
|
||||
|
||||
protected BytesSortedSourceBase(IndexInput datIn, IndexInput idxIn,
|
||||
Comparator<BytesRef> comp, PagedBytes pagedBytes, long bytesToRead,ValueType type)
|
||||
throws IOException {
|
||||
assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
|
||||
+ (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
|
||||
this.datIn = datIn;
|
||||
this.pagedBytes = pagedBytes;
|
||||
this.pagedBytes.copy(datIn, bytesToRead);
|
||||
data = pagedBytes.freeze(true);
|
||||
this.idxIn = idxIn;
|
||||
this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator()
|
||||
: comp;
|
||||
docToOrdIndex = PackedInts.getReader(idxIn);
|
||||
this.type = type;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int ord(int docID) {
|
||||
return (int) docToOrdIndex.get(docID) -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
|
||||
assert ord >= 0;
|
||||
return deref(ord, bytesRef);
|
||||
}
|
||||
|
||||
protected void closeIndexInput() throws IOException {
|
||||
IOUtils.close(datIn, idxIn);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the largest doc id + 1 in this doc values source
|
||||
*/
|
||||
public int maxDoc() {
|
||||
return docToOrdIndex.size();
|
||||
}
|
||||
/**
|
||||
* Copies the value for the given ord to the given {@link BytesRef} and
|
||||
* returns it.
|
||||
*/
|
||||
protected abstract BytesRef deref(int ord, BytesRef bytesRef);
|
||||
|
||||
protected int binarySearch(BytesRef b, BytesRef bytesRef, int low,
|
||||
int high) {
|
||||
int mid = 0;
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
deref(mid, bytesRef);
|
||||
final int cmp = comp.compare(bytesRef, b);
|
||||
if (cmp < 0) {
|
||||
low = mid + 1;
|
||||
} else if (cmp > 0) {
|
||||
high = mid - 1;
|
||||
} else {
|
||||
return mid;
|
||||
}
|
||||
}
|
||||
assert comp.compare(bytesRef, b) != 0;
|
||||
return -(low + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||
return new SourceEnum(attrSource, type(), this, maxDoc()) {
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= numDocs) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
while (source.getBytes(target, bytesRef).length == 0) {
|
||||
if (++target >= numDocs) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
return pos = target;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: open up this API?!
|
||||
static abstract class BytesWriterBase extends Writer {
|
||||
private final String id;
|
||||
private IndexOutput idxOut;
|
||||
private IndexOutput datOut;
|
||||
protected BytesRef bytesRef;
|
||||
protected BytesRef bytesRef = new BytesRef();
|
||||
private final Directory dir;
|
||||
private final String codecName;
|
||||
private final int version;
|
||||
|
@ -467,8 +292,8 @@ public final class Bytes {
|
|||
public abstract void finish(int docCount) throws IOException;
|
||||
|
||||
@Override
|
||||
protected void mergeDoc(int docID) throws IOException {
|
||||
add(docID, bytesRef);
|
||||
protected void mergeDoc(int docID, int sourceDoc) throws IOException {
|
||||
add(docID, currentMergeSource.getBytes(sourceDoc, bytesRef));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -479,11 +304,6 @@ public final class Bytes {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setNextEnum(ValuesEnum valuesEnum) {
|
||||
bytesRef = valuesEnum.bytes();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void files(Collection<String> files) throws IOException {
|
||||
assert datOut != null;
|
||||
|
@ -506,30 +326,34 @@ public final class Bytes {
|
|||
protected final IndexInput datIn;
|
||||
protected final int version;
|
||||
protected final String id;
|
||||
protected final ValueType type;
|
||||
|
||||
protected BytesReaderBase(Directory dir, String id, String codecName,
|
||||
int maxVersion, boolean doIndex, IOContext context) throws IOException {
|
||||
this.id = id;
|
||||
datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
|
||||
Writer.DATA_EXTENSION), context);
|
||||
int maxVersion, boolean doIndex, IOContext context, ValueType type) throws IOException {
|
||||
IndexInput dataIn = null;
|
||||
IndexInput indexIn = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
version = CodecUtil.checkHeader(datIn, codecName, maxVersion, maxVersion);
|
||||
if (doIndex) {
|
||||
idxIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
|
||||
Writer.INDEX_EXTENSION), context);
|
||||
final int version2 = CodecUtil.checkHeader(idxIn, codecName,
|
||||
maxVersion, maxVersion);
|
||||
assert version == version2;
|
||||
} else {
|
||||
idxIn = null;
|
||||
}
|
||||
success = true;
|
||||
dataIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
|
||||
Writer.DATA_EXTENSION), context);
|
||||
version = CodecUtil.checkHeader(dataIn, codecName, maxVersion, maxVersion);
|
||||
if (doIndex) {
|
||||
indexIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
|
||||
Writer.INDEX_EXTENSION), context);
|
||||
final int version2 = CodecUtil.checkHeader(indexIn, codecName,
|
||||
maxVersion, maxVersion);
|
||||
assert version == version2;
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
closeInternal();
|
||||
IOUtils.closeWhileHandlingException(dataIn, indexIn);
|
||||
}
|
||||
}
|
||||
datIn = dataIn;
|
||||
idxIn = indexIn;
|
||||
this.type = type;
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -553,23 +377,20 @@ public final class Bytes {
|
|||
try {
|
||||
super.close();
|
||||
} finally {
|
||||
closeInternal();
|
||||
IOUtils.close(datIn, idxIn);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return type;
|
||||
}
|
||||
|
||||
private void closeInternal() throws IOException {
|
||||
try {
|
||||
datIn.close();
|
||||
} finally {
|
||||
if (idxIn != null) {
|
||||
idxIn.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static abstract class DerefBytesWriterBase extends BytesWriterBase {
|
||||
protected int size = -1;
|
||||
protected int lastDocId = -1;
|
||||
protected int[] docToEntry;
|
||||
protected final BytesRefHash hash;
|
||||
|
||||
|
@ -608,17 +429,33 @@ public final class Bytes {
|
|||
return;
|
||||
}
|
||||
checkSize(bytes);
|
||||
fillDefault(docID);
|
||||
int ord = hash.add(bytes);
|
||||
if (ord < 0) {
|
||||
ord = (-ord) - 1;
|
||||
}
|
||||
|
||||
docToEntry[docID] = ord;
|
||||
lastDocId = docID;
|
||||
}
|
||||
|
||||
protected void fillDefault(int docID) {
|
||||
if (docID >= docToEntry.length) {
|
||||
final int size = docToEntry.length;
|
||||
docToEntry = ArrayUtil.grow(docToEntry, 1 + docID);
|
||||
bytesUsed.addAndGet((docToEntry.length - size)
|
||||
* RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
docToEntry[docID] = 1 + ord;
|
||||
assert size >= 0;
|
||||
BytesRef ref = new BytesRef(size);
|
||||
ref.length = size;
|
||||
int ord = hash.add(ref);
|
||||
if (ord < 0) {
|
||||
ord = (-ord) - 1;
|
||||
}
|
||||
for (int i = lastDocId+1; i < docID; i++) {
|
||||
docToEntry[i] = ord;
|
||||
}
|
||||
}
|
||||
|
||||
protected void checkSize(BytesRef bytes) {
|
||||
|
@ -713,77 +550,50 @@ public final class Bytes {
|
|||
|
||||
}
|
||||
|
||||
abstract static class DerefBytesEnumBase extends ValuesEnum {
|
||||
private final PackedInts.ReaderIterator idx;
|
||||
private final int valueCount;
|
||||
private int pos = -1;
|
||||
static abstract class BytesSortedSourceBase extends SortedSource {
|
||||
private final PagedBytes pagedBytes;
|
||||
|
||||
protected final PackedInts.Reader docToOrdIndex;
|
||||
protected final IndexInput datIn;
|
||||
protected final long fp;
|
||||
protected final int size;
|
||||
protected final IndexInput idxIn;
|
||||
protected final BytesRef defaultValue = new BytesRef();
|
||||
protected final static int PAGED_BYTES_BITS = 15;
|
||||
protected final PagedBytes.Reader data;
|
||||
|
||||
protected DerefBytesEnumBase(AttributeSource source, IndexInput datIn,
|
||||
IndexInput idxIn, int size, ValueType enumType) throws IOException {
|
||||
super(source, enumType);
|
||||
protected BytesSortedSourceBase(IndexInput datIn, IndexInput idxIn,
|
||||
Comparator<BytesRef> comp, long bytesToRead, ValueType type) throws IOException {
|
||||
this(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), bytesToRead, type);
|
||||
}
|
||||
|
||||
protected BytesSortedSourceBase(IndexInput datIn, IndexInput idxIn,
|
||||
Comparator<BytesRef> comp, PagedBytes pagedBytes, long bytesToRead,ValueType type)
|
||||
throws IOException {
|
||||
super(type, comp);
|
||||
assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
|
||||
+ (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
|
||||
this.datIn = datIn;
|
||||
this.size = size;
|
||||
idx = PackedInts.getReaderIterator(idxIn);
|
||||
fp = datIn.getFilePointer();
|
||||
if (size > 0) {
|
||||
bytesRef.grow(this.size);
|
||||
bytesRef.length = this.size;
|
||||
}
|
||||
bytesRef.offset = 0;
|
||||
valueCount = idx.size();
|
||||
}
|
||||
this.pagedBytes = pagedBytes;
|
||||
this.pagedBytes.copy(datIn, bytesToRead);
|
||||
data = pagedBytes.freeze(true);
|
||||
this.idxIn = idxIn;
|
||||
docToOrdIndex = PackedInts.getReader(idxIn);
|
||||
|
||||
protected void copyFrom(ValuesEnum valuesEnum) {
|
||||
bytesRef = valuesEnum.bytesRef;
|
||||
if (bytesRef.bytes.length < size) {
|
||||
bytesRef.grow(size);
|
||||
}
|
||||
bytesRef.length = size;
|
||||
bytesRef.offset = 0;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target < valueCount) {
|
||||
long address;
|
||||
while ((address = idx.advance(target)) == 0) {
|
||||
if (++target >= valueCount) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
pos = idx.ord();
|
||||
fill(address, bytesRef);
|
||||
return pos;
|
||||
}
|
||||
return pos = NO_MORE_DOCS;
|
||||
public int ord(int docID) {
|
||||
return (int) docToOrdIndex.get(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (pos >= valueCount) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
return advance(pos + 1);
|
||||
protected void closeIndexInput() throws IOException {
|
||||
IOUtils.close(datIn, idxIn);
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
datIn.close();
|
||||
} finally {
|
||||
idx.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the largest doc id + 1 in this doc values source
|
||||
*/
|
||||
public int maxDoc() {
|
||||
return docToOrdIndex.size();
|
||||
}
|
||||
|
||||
protected abstract void fill(long address, BytesRef ref) throws IOException;
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,137 @@
|
|||
package org.apache.lucene.index.values;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Base class for disk resident source implementations
|
||||
* @lucene.internal
|
||||
*/
|
||||
abstract class DirectSource extends Source {
|
||||
|
||||
protected final IndexInput data;
|
||||
private final ToNumeric toNumeric;
|
||||
protected final long baseOffset;
|
||||
|
||||
DirectSource(IndexInput input, ValueType type) {
|
||||
super(type);
|
||||
this.data = input;
|
||||
baseOffset = input.getFilePointer();
|
||||
switch (type) {
|
||||
case FIXED_INTS_16:
|
||||
toNumeric = new ShortToLong();
|
||||
break;
|
||||
case FLOAT_32:
|
||||
case FIXED_INTS_32:
|
||||
toNumeric = new IntToLong();
|
||||
break;
|
||||
case FIXED_INTS_8:
|
||||
toNumeric = new ByteToLong();
|
||||
break;
|
||||
default:
|
||||
toNumeric = new LongToLong();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||
try {
|
||||
final int sizeToRead = position(docID);
|
||||
ref.grow(sizeToRead);
|
||||
data.readBytes(ref.bytes, 0, sizeToRead);
|
||||
ref.length = sizeToRead;
|
||||
ref.offset = 0;
|
||||
return ref;
|
||||
} catch (IOException ex) {
|
||||
throw new IllegalStateException("failed to get value for docID: " + docID, ex);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getInt(int docID) {
|
||||
try {
|
||||
position(docID);
|
||||
return toNumeric.toLong(data);
|
||||
} catch (IOException ex) {
|
||||
throw new IllegalStateException("failed to get value for docID: " + docID, ex);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getFloat(int docID) {
|
||||
try {
|
||||
position(docID);
|
||||
return toNumeric.toDouble(data);
|
||||
} catch (IOException ex) {
|
||||
throw new IllegalStateException("failed to get value for docID: " + docID, ex);
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract int position(int docID) throws IOException;
|
||||
|
||||
private abstract static class ToNumeric {
|
||||
abstract long toLong(IndexInput input) throws IOException;
|
||||
|
||||
double toDouble(IndexInput input) throws IOException {
|
||||
return toLong(input);
|
||||
}
|
||||
}
|
||||
|
||||
private static final class ByteToLong extends ToNumeric {
|
||||
@Override
|
||||
long toLong(IndexInput input) throws IOException {
|
||||
return input.readByte();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static final class ShortToLong extends ToNumeric {
|
||||
@Override
|
||||
long toLong(IndexInput input) throws IOException {
|
||||
return input.readShort();
|
||||
}
|
||||
}
|
||||
|
||||
private static final class IntToLong extends ToNumeric {
|
||||
@Override
|
||||
long toLong(IndexInput input) throws IOException {
|
||||
return input.readInt();
|
||||
}
|
||||
|
||||
double toDouble(IndexInput input) throws IOException {
|
||||
return Float.intBitsToFloat(input.readInt());
|
||||
}
|
||||
}
|
||||
|
||||
private static final class LongToLong extends ToNumeric {
|
||||
@Override
|
||||
long toLong(IndexInput input) throws IOException {
|
||||
return input.readLong();
|
||||
}
|
||||
|
||||
double toDouble(IndexInput input) throws IOException {
|
||||
return Double.longBitsToDouble(input.readLong());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -20,16 +20,17 @@ package org.apache.lucene.index.values;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesSourceBase;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesEnumBase;
|
||||
import org.apache.lucene.index.values.Bytes.BytesSourceBase;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
|
||||
import org.apache.lucene.index.values.DirectSource;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
// Stores fixed-length byte[] by deref, ie when two docs
|
||||
// have the same value, they store only 1 byte[]
|
||||
|
@ -66,63 +67,61 @@ class FixedDerefBytesImpl {
|
|||
}
|
||||
}
|
||||
|
||||
public static class Reader extends BytesReaderBase {
|
||||
public static class FixedDerefReader extends BytesReaderBase {
|
||||
private final int size;
|
||||
private final int numValuesStored;
|
||||
Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_START, true, context);
|
||||
FixedDerefReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_START, true, context, ValueType.BYTES_FIXED_DEREF);
|
||||
size = datIn.readInt();
|
||||
numValuesStored = idxIn.readInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source load() throws IOException {
|
||||
return new Source(cloneData(), cloneIndex(), size, numValuesStored);
|
||||
}
|
||||
|
||||
private static final class Source extends DerefBytesSourceBase {
|
||||
private final int size;
|
||||
|
||||
protected Source(IndexInput datIn, IndexInput idxIn, int size, long numValues) throws IOException {
|
||||
super(datIn, idxIn, size * numValues, ValueType.BYTES_FIXED_DEREF);
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||
final int id = (int) addresses.get(docID);
|
||||
if (id == 0) {
|
||||
bytesRef.length = 0;
|
||||
return bytesRef;
|
||||
}
|
||||
return data.fillSlice(bytesRef, ((id - 1) * size), size);
|
||||
}
|
||||
|
||||
return new FixedDerefSource(cloneData(), cloneIndex(), size, numValuesStored);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||
return new DerefBytesEnum(source, cloneData(), cloneIndex(), size);
|
||||
public Source getDirectSource()
|
||||
throws IOException {
|
||||
return new DirectFixedDerefSource(cloneData(), cloneIndex(), size, type());
|
||||
}
|
||||
}
|
||||
|
||||
static final class FixedDerefSource extends BytesSourceBase {
|
||||
private final int size;
|
||||
private final PackedInts.Reader addresses;
|
||||
|
||||
final static class DerefBytesEnum extends DerefBytesEnumBase {
|
||||
|
||||
public DerefBytesEnum(AttributeSource source, IndexInput datIn,
|
||||
IndexInput idxIn, int size) throws IOException {
|
||||
super(source, datIn, idxIn, size, ValueType.BYTES_FIXED_DEREF);
|
||||
}
|
||||
|
||||
protected void fill(long address, BytesRef ref) throws IOException {
|
||||
datIn.seek(fp + ((address - 1) * size));
|
||||
datIn.readBytes(ref.bytes, 0, size);
|
||||
ref.length = size;
|
||||
ref.offset = 0;
|
||||
}
|
||||
protected FixedDerefSource(IndexInput datIn, IndexInput idxIn, int size, long numValues) throws IOException {
|
||||
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size * numValues,
|
||||
ValueType.BYTES_FIXED_DEREF);
|
||||
this.size = size;
|
||||
addresses = PackedInts.getReader(idxIn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.BYTES_FIXED_DEREF;
|
||||
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||
final int id = (int) addresses.get(docID);
|
||||
return data.fillSlice(bytesRef, (id * size), size);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
final static class DirectFixedDerefSource extends DirectSource {
|
||||
private final PackedInts.RandomAccessReaderIterator index;
|
||||
private final int size;
|
||||
|
||||
DirectFixedDerefSource(IndexInput data, IndexInput index, int size, ValueType type)
|
||||
throws IOException {
|
||||
super(data, type);
|
||||
this.size = size;
|
||||
this.index = PackedInts.getRandomAccessReaderIterator(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int position(int docID) throws IOException {
|
||||
data.seek(baseOffset + index.get(docID) * size);
|
||||
return size;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -23,14 +23,14 @@ import java.util.Comparator;
|
|||
import org.apache.lucene.index.values.Bytes.BytesSortedSourceBase;
|
||||
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
|
||||
import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum;
|
||||
import org.apache.lucene.index.values.IndexDocValues.SortedSource;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
// Stores fixed-length byte[] by deref, ie when two docs
|
||||
// have the same value, they store only 1 byte[]
|
||||
|
@ -44,7 +44,7 @@ class FixedSortedBytesImpl {
|
|||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
static class Writer extends DerefBytesWriterBase {
|
||||
static final class Writer extends DerefBytesWriterBase {
|
||||
private final Comparator<BytesRef> comp;
|
||||
|
||||
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||
|
@ -57,9 +57,10 @@ class FixedSortedBytesImpl {
|
|||
// some last docs that we didn't see
|
||||
@Override
|
||||
public void finishInternal(int docCount) throws IOException {
|
||||
fillDefault(docCount);
|
||||
final IndexOutput datOut = getOrCreateDataOut();
|
||||
final int count = hash.size();
|
||||
final int[] address = new int[count+1]; // addr 0 is default values
|
||||
final int[] address = new int[count]; // addr 0 is default values
|
||||
datOut.writeInt(size);
|
||||
if (size != -1) {
|
||||
final int[] sortedEntries = hash.sort(comp);
|
||||
|
@ -70,7 +71,7 @@ class FixedSortedBytesImpl {
|
|||
final BytesRef bytes = hash.get(e, bytesRef);
|
||||
assert bytes.length == size;
|
||||
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
|
||||
address[e + 1] = 1 + i;
|
||||
address[e] = i;
|
||||
}
|
||||
}
|
||||
final IndexOutput idxOut = getOrCreateIndexOut();
|
||||
|
@ -79,65 +80,101 @@ class FixedSortedBytesImpl {
|
|||
}
|
||||
}
|
||||
|
||||
public static class Reader extends BytesReaderBase {
|
||||
static final class Reader extends BytesReaderBase {
|
||||
private final int size;
|
||||
private final int numValuesStored;
|
||||
private final int valueCount;
|
||||
private final Comparator<BytesRef> comparator;
|
||||
|
||||
public Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_START, true, context);
|
||||
public Reader(Directory dir, String id, int maxDoc, IOContext context,
|
||||
ValueType type, Comparator<BytesRef> comparator) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_START, true, context, type);
|
||||
size = datIn.readInt();
|
||||
numValuesStored = idxIn.readInt();
|
||||
valueCount = idxIn.readInt();
|
||||
this.comparator = comparator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public org.apache.lucene.index.values.IndexDocValues.Source load()
|
||||
public Source load() throws IOException {
|
||||
return new FixedSortedSource(cloneData(), cloneIndex(), size,
|
||||
valueCount, comparator);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source getDirectSource() throws IOException {
|
||||
return new DirectFixedSortedSource(cloneData(), cloneIndex(), size,
|
||||
valueCount, comparator, type);
|
||||
}
|
||||
}
|
||||
|
||||
static final class FixedSortedSource extends BytesSortedSourceBase {
|
||||
private final int valueCount;
|
||||
private final int size;
|
||||
|
||||
FixedSortedSource(IndexInput datIn, IndexInput idxIn, int size,
|
||||
int numValues, Comparator<BytesRef> comp) throws IOException {
|
||||
super(datIn, idxIn, comp, size * numValues, ValueType.BYTES_FIXED_SORTED);
|
||||
this.size = size;
|
||||
this.valueCount = numValues;
|
||||
closeIndexInput();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
|
||||
return data.fillSlice(bytesRef, (ord * size), size);
|
||||
}
|
||||
}
|
||||
|
||||
static final class DirectFixedSortedSource extends SortedSource {
|
||||
final PackedInts.RandomAccessReaderIterator docToOrdIndex;
|
||||
private final IndexInput datIn;
|
||||
private final long basePointer;
|
||||
private final int size;
|
||||
private final int valueCount;
|
||||
|
||||
DirectFixedSortedSource(IndexInput datIn, IndexInput idxIn, int size,
|
||||
int valueCount, Comparator<BytesRef> comp, ValueType type)
|
||||
throws IOException {
|
||||
return loadSorted(null);
|
||||
super(type, comp);
|
||||
docToOrdIndex = PackedInts.getRandomAccessReaderIterator(idxIn);
|
||||
basePointer = datIn.getFilePointer();
|
||||
this.datIn = datIn;
|
||||
this.size = size;
|
||||
this.valueCount = valueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedSource loadSorted(Comparator<BytesRef> comp)
|
||||
throws IOException {
|
||||
return new Source(cloneData(), cloneIndex(), size, numValuesStored, comp);
|
||||
}
|
||||
|
||||
private static class Source extends BytesSortedSourceBase {
|
||||
private final int valueCount;
|
||||
private final int size;
|
||||
|
||||
public Source(IndexInput datIn, IndexInput idxIn, int size,
|
||||
int numValues, Comparator<BytesRef> comp) throws IOException {
|
||||
super(datIn, idxIn, comp, size * numValues, ValueType.BYTES_FIXED_SORTED);
|
||||
this.size = size;
|
||||
this.valueCount = numValues;
|
||||
closeIndexInput();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getByValue(BytesRef bytes, BytesRef tmpRef) {
|
||||
return binarySearch(bytes, tmpRef, 0, valueCount - 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BytesRef deref(int ord, BytesRef bytesRef) {
|
||||
return data.fillSlice(bytesRef, (ord * size), size);
|
||||
public int ord(int docID) {
|
||||
try {
|
||||
return (int) docToOrdIndex.get(docID);
|
||||
} catch (IOException e) {
|
||||
throw new IllegalStateException("failed to get ord", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||
// do unsorted
|
||||
return new DerefBytesEnum(source, cloneData(), cloneIndex(), size);
|
||||
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
|
||||
try {
|
||||
datIn.seek(basePointer + size * ord);
|
||||
if (bytesRef.bytes.length < size) {
|
||||
bytesRef.grow(size);
|
||||
}
|
||||
datIn.readBytes(bytesRef.bytes, 0, size);
|
||||
bytesRef.length = size;
|
||||
bytesRef.offset = 0;
|
||||
return bytesRef;
|
||||
} catch (IOException ex) {
|
||||
throw new IllegalStateException("failed to getByOrd", ex);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.BYTES_FIXED_SORTED;
|
||||
public int getValueCount() {
|
||||
return valueCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,11 +24,12 @@ import java.io.IOException;
|
|||
import org.apache.lucene.index.values.Bytes.BytesSourceBase;
|
||||
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
|
||||
import org.apache.lucene.index.values.DirectSource;
|
||||
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -137,8 +138,8 @@ class FixedStraightBytesImpl {
|
|||
datOut = getOrCreateDataOut();
|
||||
boolean success = false;
|
||||
try {
|
||||
if (state.liveDocs == null && state.reader instanceof Reader ) {
|
||||
Reader reader = (Reader) state.reader;
|
||||
if (state.liveDocs == null && state.reader instanceof FixedStraightReader ) {
|
||||
FixedStraightReader reader = (FixedStraightReader) state.reader;
|
||||
final int maxDocs = reader.maxDoc;
|
||||
if (maxDocs == 0) {
|
||||
return;
|
||||
|
@ -175,8 +176,9 @@ class FixedStraightBytesImpl {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void mergeDoc(int docID) throws IOException {
|
||||
protected void mergeDoc(int docID, int sourceDoc) throws IOException {
|
||||
assert lastDocID < docID;
|
||||
currentMergeSource.getBytes(sourceDoc, bytesRef);
|
||||
if (size == -1) {
|
||||
size = bytesRef.length;
|
||||
datOut.writeInt(size);
|
||||
|
@ -236,16 +238,16 @@ class FixedStraightBytesImpl {
|
|||
|
||||
}
|
||||
|
||||
public static class Reader extends BytesReaderBase {
|
||||
public static class FixedStraightReader extends BytesReaderBase {
|
||||
protected final int size;
|
||||
protected final int maxDoc;
|
||||
|
||||
Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
||||
this(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context);
|
||||
FixedStraightReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
||||
this(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context, ValueType.BYTES_FIXED_STRAIGHT);
|
||||
}
|
||||
|
||||
protected Reader(Directory dir, String id, String codec, int version, int maxDoc, IOContext context) throws IOException {
|
||||
super(dir, id, codec, version, false, context);
|
||||
protected FixedStraightReader(Directory dir, String id, String codec, int version, int maxDoc, IOContext context, ValueType type) throws IOException {
|
||||
super(dir, id, codec, version, false, context, type);
|
||||
size = datIn.readInt();
|
||||
this.maxDoc = maxDoc;
|
||||
}
|
||||
|
@ -253,155 +255,83 @@ class FixedStraightBytesImpl {
|
|||
@Override
|
||||
public Source load() throws IOException {
|
||||
return size == 1 ? new SingleByteSource(cloneData(), maxDoc) :
|
||||
new StraightBytesSource(cloneData(), size, maxDoc);
|
||||
new FixedStraightSource(cloneData(), size, maxDoc, type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
datIn.close();
|
||||
}
|
||||
|
||||
// specialized version for single bytes
|
||||
private static class SingleByteSource extends Source {
|
||||
private final int maxDoc;
|
||||
private final byte[] data;
|
||||
|
||||
public SingleByteSource(IndexInput datIn, int maxDoc) throws IOException {
|
||||
this.maxDoc = maxDoc;
|
||||
try {
|
||||
data = new byte[maxDoc];
|
||||
datIn.readBytes(data, 0, data.length, false);
|
||||
} finally {
|
||||
IOUtils.close(datIn);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||
bytesRef.length = 1;
|
||||
bytesRef.bytes = data;
|
||||
bytesRef.offset = docID;
|
||||
return bytesRef;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.BYTES_FIXED_STRAIGHT;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||
return new SourceEnum(attrSource, type(), this, maxDoc) {
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= numDocs) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
bytesRef.length = 1;
|
||||
bytesRef.bytes = data;
|
||||
bytesRef.offset = target;
|
||||
return pos = target;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private final static class StraightBytesSource extends BytesSourceBase {
|
||||
private final int size;
|
||||
private final int maxDoc;
|
||||
|
||||
public StraightBytesSource(IndexInput datIn, int size, int maxDoc)
|
||||
throws IOException {
|
||||
super(datIn, null, new PagedBytes(PAGED_BYTES_BITS), size * maxDoc, ValueType.BYTES_FIXED_STRAIGHT);
|
||||
this.size = size;
|
||||
this.maxDoc = maxDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||
return data.fillSlice(bytesRef, docID * size, size);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return maxDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int maxDoc() {
|
||||
return maxDoc;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||
return new FixedStraightBytesEnum(source, cloneData(), size, maxDoc);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.BYTES_FIXED_STRAIGHT;
|
||||
public Source getDirectSource() throws IOException {
|
||||
return new DirectFixedStraightSource(cloneData(), size, type());
|
||||
}
|
||||
}
|
||||
|
||||
static class FixedStraightBytesEnum extends ValuesEnum {
|
||||
private final IndexInput datIn;
|
||||
private final int size;
|
||||
private final int maxDoc;
|
||||
private int pos = -1;
|
||||
private final long fp;
|
||||
// specialized version for single bytes
|
||||
private static final class SingleByteSource extends Source {
|
||||
private final byte[] data;
|
||||
|
||||
public FixedStraightBytesEnum(AttributeSource source, IndexInput datIn,
|
||||
int size, int maxDoc) throws IOException {
|
||||
super(source, ValueType.BYTES_FIXED_STRAIGHT);
|
||||
this.datIn = datIn;
|
||||
this.size = size;
|
||||
this.maxDoc = maxDoc;
|
||||
bytesRef.grow(size);
|
||||
bytesRef.length = size;
|
||||
bytesRef.offset = 0;
|
||||
fp = datIn.getFilePointer();
|
||||
}
|
||||
|
||||
protected void copyFrom(ValuesEnum valuesEnum) {
|
||||
super.copyFrom(valuesEnum);
|
||||
if (bytesRef.bytes.length < size) {
|
||||
bytesRef.grow(size);
|
||||
public SingleByteSource(IndexInput datIn, int maxDoc) throws IOException {
|
||||
super(ValueType.BYTES_FIXED_STRAIGHT);
|
||||
try {
|
||||
data = new byte[maxDoc];
|
||||
datIn.readBytes(data, 0, data.length, false);
|
||||
} finally {
|
||||
IOUtils.close(datIn);
|
||||
}
|
||||
bytesRef.length = size;
|
||||
bytesRef.offset = 0;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
datIn.close();
|
||||
|
||||
@Override
|
||||
public boolean hasArray() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= maxDoc || size == 0) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
if ((target - 1) != pos) // pos inc == 1
|
||||
datIn.seek(fp + target * size);
|
||||
datIn.readBytes(bytesRef.bytes, 0, size);
|
||||
return pos = target;
|
||||
public Object getArray() {
|
||||
return data;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (pos >= maxDoc) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
return advance(pos + 1);
|
||||
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||
bytesRef.length = 1;
|
||||
bytesRef.bytes = data;
|
||||
bytesRef.offset = docID;
|
||||
return bytesRef;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private final static class FixedStraightSource extends BytesSourceBase {
|
||||
private final int size;
|
||||
|
||||
public FixedStraightSource(IndexInput datIn, int size, int maxDoc, ValueType type)
|
||||
throws IOException {
|
||||
super(datIn, null, new PagedBytes(PAGED_BYTES_BITS), size * maxDoc,
|
||||
type);
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||
return data.fillSlice(bytesRef, docID * size, size);
|
||||
}
|
||||
}
|
||||
|
||||
public final static class DirectFixedStraightSource extends DirectSource {
|
||||
private final int size;
|
||||
|
||||
DirectFixedStraightSource(IndexInput input, int size, ValueType type) {
|
||||
super(input, type);
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int position(int docID) throws IOException {
|
||||
data.seek(baseOffset + size * docID);
|
||||
return size;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,9 +22,9 @@ import org.apache.lucene.index.values.IndexDocValues.Source;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Exposes {@link Writer} and reader ({@link Source}) for 32 bit and 64 bit
|
||||
|
@ -37,37 +37,47 @@ import org.apache.lucene.util.Counter;
|
|||
*/
|
||||
public class Floats {
|
||||
|
||||
public static Writer getWriter(Directory dir, String id, int precisionBytes,
|
||||
Counter bytesUsed, IOContext context) throws IOException {
|
||||
if (precisionBytes != 4 && precisionBytes != 8) {
|
||||
throw new IllegalArgumentException("precisionBytes must be 4 or 8; got "
|
||||
+ precisionBytes);
|
||||
}
|
||||
return new FloatsWriter(dir, id, bytesUsed, context, precisionBytes);
|
||||
|
||||
protected static final String CODEC_NAME = "Floats";
|
||||
protected static final int VERSION_START = 0;
|
||||
protected static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
public static Writer getWriter(Directory dir, String id, Counter bytesUsed,
|
||||
IOContext context, ValueType type) throws IOException {
|
||||
return new FloatsWriter(dir, id, bytesUsed, context, type);
|
||||
}
|
||||
|
||||
public static IndexDocValues getValues(Directory dir, String id, int maxDoc, IOContext context)
|
||||
public static IndexDocValues getValues(Directory dir, String id, int maxDoc, IOContext context, ValueType type)
|
||||
throws IOException {
|
||||
return new FloatsReader(dir, id, maxDoc, context);
|
||||
return new FloatsReader(dir, id, maxDoc, context, type);
|
||||
}
|
||||
|
||||
private static int typeToSize(ValueType type) {
|
||||
switch (type) {
|
||||
case FLOAT_32:
|
||||
return 4;
|
||||
case FLOAT_64:
|
||||
return 8;
|
||||
default:
|
||||
throw new IllegalStateException("illegal type " + type);
|
||||
}
|
||||
}
|
||||
|
||||
final static class FloatsWriter extends FixedStraightBytesImpl.Writer {
|
||||
|
||||
private final int size;
|
||||
private final IndexDocValuesArray template;
|
||||
public FloatsWriter(Directory dir, String id, Counter bytesUsed,
|
||||
IOContext context, int size) throws IOException {
|
||||
super(dir, id, bytesUsed, context);
|
||||
IOContext context, ValueType type) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||
size = typeToSize(type);
|
||||
this.bytesRef = new BytesRef(size);
|
||||
this.size = size;
|
||||
bytesRef.length = size;
|
||||
template = IndexDocValuesArray.TEMPLATES.get(type);
|
||||
assert template != null;
|
||||
}
|
||||
|
||||
public void add(int docID, double v) throws IOException {
|
||||
if (size == 8) {
|
||||
bytesRef.copy(Double.doubleToRawLongBits(v));
|
||||
} else {
|
||||
bytesRef.copy(Float.floatToRawIntBits((float)v));
|
||||
}
|
||||
template.toBytes(v, bytesRef);
|
||||
add(docID, bytesRef);
|
||||
}
|
||||
|
||||
|
@ -76,19 +86,14 @@ public class Floats {
|
|||
add(docID, docValues.getFloat());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
final static class FloatsReader extends FixedStraightBytesImpl.Reader {
|
||||
final static class FloatsReader extends FixedStraightBytesImpl.FixedStraightReader {
|
||||
final IndexDocValuesArray arrayTemplate;
|
||||
FloatsReader(Directory dir, String id, int maxDoc, IOContext context)
|
||||
FloatsReader(Directory dir, String id, int maxDoc, IOContext context, ValueType type)
|
||||
throws IOException {
|
||||
super(dir, id, maxDoc, context);
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context, type);
|
||||
arrayTemplate = IndexDocValuesArray.TEMPLATES.get(type);
|
||||
assert size == 4 || size == 8;
|
||||
if (size == 4) {
|
||||
arrayTemplate = new IndexDocValuesArray.FloatValues();
|
||||
} else {
|
||||
arrayTemplate = new IndexDocValuesArray.DoubleValues();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -97,19 +102,10 @@ public class Floats {
|
|||
try {
|
||||
return arrayTemplate.newFromInput(indexInput, maxDoc);
|
||||
} finally {
|
||||
indexInput.close();
|
||||
IOUtils.close(indexInput);
|
||||
}
|
||||
}
|
||||
|
||||
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||
IndexInput indexInput = (IndexInput) datIn.clone();
|
||||
return arrayTemplate.getDirectEnum(source, indexInput, maxDoc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return arrayTemplate.type();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.index.FieldsEnum;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
|
@ -34,8 +33,8 @@ import org.apache.lucene.util.BytesRef;
|
|||
* value access based on the lucene internal document id. {@link IndexDocValues}
|
||||
* exposes two distinct APIs:
|
||||
* <ul>
|
||||
* <li>via {@link Source} an entirely RAM resident API for random access</li>
|
||||
* <li>via {@link ValuesEnum} a disk resident API for sequential access</li>
|
||||
* <li>via {@link #getSource()} providing RAM resident random access</li>
|
||||
* <li>via {@link #getDirectSource()} providing on disk random access</li>
|
||||
* </ul> {@link IndexDocValues} are exposed via
|
||||
* {@link IndexReader#perDocValues()} on a per-segment basis. For best
|
||||
* performance {@link IndexDocValues} should be consumed per-segment just like
|
||||
|
@ -52,47 +51,18 @@ import org.apache.lucene.util.BytesRef;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class IndexDocValues implements Closeable {
|
||||
/*
|
||||
* TODO: it might be useful to add another Random Access enum for some
|
||||
* implementations like packed ints and only return such a random access enum
|
||||
* if the impl supports random access. For super large segments it might be
|
||||
* useful or even required in certain environements to have disc based random
|
||||
* access
|
||||
*/
|
||||
|
||||
public static final IndexDocValues[] EMPTY_ARRAY = new IndexDocValues[0];
|
||||
|
||||
private SourceCache cache = new SourceCache.DirectSourceCache();
|
||||
|
||||
/**
|
||||
* Returns an iterator that steps through all documents values for this
|
||||
* {@link IndexDocValues} field instance. {@link ValuesEnum} will skip document
|
||||
* without a value if applicable.
|
||||
*/
|
||||
public ValuesEnum getEnum() throws IOException {
|
||||
return getEnum(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an iterator that steps through all documents values for this
|
||||
* {@link IndexDocValues} field instance. {@link ValuesEnum} will skip document
|
||||
* without a value if applicable.
|
||||
* <p>
|
||||
* If an {@link AttributeSource} is supplied to this method the
|
||||
* {@link ValuesEnum} will use the given source to access implementation
|
||||
* related attributes.
|
||||
*/
|
||||
public abstract ValuesEnum getEnum(AttributeSource attrSource)
|
||||
throws IOException;
|
||||
|
||||
private volatile SourceCache cache = new SourceCache.DirectSourceCache();
|
||||
private final Object cacheLock = new Object();
|
||||
|
||||
/**
|
||||
* Loads a new {@link Source} instance for this {@link IndexDocValues} field
|
||||
* instance. Source instances returned from this method are not cached. It is
|
||||
* the callers responsibility to maintain the instance and release its
|
||||
* resources once the source is not needed anymore.
|
||||
* <p>
|
||||
* This method will return null iff this {@link IndexDocValues} represent a
|
||||
* {@link SortedSource}.
|
||||
* <p>
|
||||
* For managed {@link Source} instances see {@link #getSource()}.
|
||||
*
|
||||
* @see #getSource()
|
||||
|
@ -111,62 +81,17 @@ public abstract class IndexDocValues implements Closeable {
|
|||
* from the cache once this {@link IndexDocValues} instance is closed by the
|
||||
* {@link IndexReader}, {@link Fields} or {@link FieldsEnum} the
|
||||
* {@link IndexDocValues} was created from.
|
||||
* <p>
|
||||
* This method will return null iff this {@link IndexDocValues} represent a
|
||||
* {@link SortedSource}.
|
||||
*/
|
||||
public Source getSource() throws IOException {
|
||||
return cache.load(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a {@link SortedSource} instance for this {@link IndexDocValues} field
|
||||
* instance like {@link #getSource()}.
|
||||
* <p>
|
||||
* This method will return null iff this {@link IndexDocValues} represent a
|
||||
* {@link Source} instead of a {@link SortedSource}.
|
||||
* Returns a disk resident {@link Source} instance. Direct Sources are not
|
||||
* cached in the {@link SourceCache} and should not be shared between threads.
|
||||
*/
|
||||
public SortedSource getSortedSorted(Comparator<BytesRef> comparator)
|
||||
throws IOException {
|
||||
return cache.loadSorted(this, comparator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a {@link SortedSource} instance using a default {@link BytesRef}
|
||||
* comparator for this {@link IndexDocValues} field instance like
|
||||
* {@link #getSource()}.
|
||||
* <p>
|
||||
* This method will return null iff this {@link IndexDocValues} represent a
|
||||
* {@link Source} instead of a {@link SortedSource}.
|
||||
*/
|
||||
public SortedSource getSortedSorted() throws IOException {
|
||||
return getSortedSorted(null);
|
||||
}
|
||||
public abstract Source getDirectSource() throws IOException;
|
||||
|
||||
/**
|
||||
* Loads and returns a {@link SortedSource} instance for this
|
||||
* {@link IndexDocValues} field instance like {@link #load()}.
|
||||
* <p>
|
||||
* This method will return null iff this {@link IndexDocValues} represent a
|
||||
* {@link Source} instead of a {@link SortedSource}.
|
||||
*/
|
||||
public SortedSource loadSorted(Comparator<BytesRef> comparator)
|
||||
throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads and returns a {@link SortedSource} instance using a default
|
||||
* {@link BytesRef} comparator for this {@link IndexDocValues} field instance
|
||||
* like {@link #load()}.
|
||||
* <p>
|
||||
* This method will return null iff this {@link IndexDocValues} represent a
|
||||
* {@link Source} instead of a {@link SortedSource}.
|
||||
*/
|
||||
public SortedSource loadSorted() throws IOException {
|
||||
return loadSorted(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link ValueType} of this {@link IndexDocValues} instance
|
||||
*/
|
||||
|
@ -183,13 +108,10 @@ public abstract class IndexDocValues implements Closeable {
|
|||
|
||||
/**
|
||||
* Sets the {@link SourceCache} used by this {@link IndexDocValues} instance. This
|
||||
* method should be called before {@link #load()} or
|
||||
* {@link #loadSorted(Comparator)} is called. All {@link Source} or
|
||||
* {@link SortedSource} instances in the currently used cache will be closed
|
||||
* method should be called before {@link #load()} is called. All {@link Source} instances in the currently used cache will be closed
|
||||
* before the new cache is installed.
|
||||
* <p>
|
||||
* Note: All instances previously obtained from {@link #load()} or
|
||||
* {@link #loadSorted(Comparator)} will be closed.
|
||||
* Note: All instances previously obtained from {@link #load()} will be lost.
|
||||
*
|
||||
* @throws IllegalArgumentException
|
||||
* if the given cache is <code>null</code>
|
||||
|
@ -198,9 +120,10 @@ public abstract class IndexDocValues implements Closeable {
|
|||
public void setCache(SourceCache cache) {
|
||||
if (cache == null)
|
||||
throw new IllegalArgumentException("cache must not be null");
|
||||
synchronized (this.cache) {
|
||||
this.cache.close(this);
|
||||
synchronized (cacheLock) {
|
||||
SourceCache toClose = this.cache;
|
||||
this.cache = cache;
|
||||
toClose.close(this);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -208,12 +131,17 @@ public abstract class IndexDocValues implements Closeable {
|
|||
* Source of per document values like long, double or {@link BytesRef}
|
||||
* depending on the {@link IndexDocValues} fields {@link ValueType}. Source
|
||||
* implementations provide random access semantics similar to array lookups
|
||||
* and typically are entirely memory resident.
|
||||
* <p>
|
||||
* {@link Source} defines 3 {@link ValueType} //TODO finish this
|
||||
* @see IndexDocValues#getSource()
|
||||
* @see IndexDocValues#getDirectSource()
|
||||
*/
|
||||
public static abstract class Source {
|
||||
|
||||
protected final ValueType type;
|
||||
|
||||
protected Source(ValueType type) {
|
||||
this.type = type;
|
||||
}
|
||||
/**
|
||||
* Returns a <tt>long</tt> for the given document id or throws an
|
||||
* {@link UnsupportedOperationException} if this source doesn't support
|
||||
|
@ -242,6 +170,7 @@ public abstract class IndexDocValues implements Closeable {
|
|||
* Returns a {@link BytesRef} for the given document id or throws an
|
||||
* {@link UnsupportedOperationException} if this source doesn't support
|
||||
* <tt>byte[]</tt> values.
|
||||
* @throws IOException
|
||||
*
|
||||
* @throws UnsupportedOperationException
|
||||
* if this source doesn't support <tt>byte[]</tt> values.
|
||||
|
@ -250,35 +179,15 @@ public abstract class IndexDocValues implements Closeable {
|
|||
throw new UnsupportedOperationException("bytes are not supported");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns number of unique values. Some implementations may throw
|
||||
* UnsupportedOperationException.
|
||||
*/
|
||||
public int getValueCount() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a {@link ValuesEnum} for this source.
|
||||
*/
|
||||
public ValuesEnum getEnum() throws IOException {
|
||||
return getEnum(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link ValueType} of this source.
|
||||
*
|
||||
* @return the {@link ValueType} of this source.
|
||||
*/
|
||||
public abstract ValueType type();
|
||||
public ValueType type() {
|
||||
return type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a {@link ValuesEnum} for this source which uses the given
|
||||
* {@link AttributeSource}.
|
||||
*/
|
||||
public abstract ValuesEnum getEnum(AttributeSource attrSource)
|
||||
throws IOException;
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> iff this {@link Source} exposes an array via
|
||||
* {@link #getArray()} otherwise <code>false</code>.
|
||||
|
@ -297,61 +206,29 @@ public abstract class IndexDocValues implements Closeable {
|
|||
public Object getArray() {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* {@link ValuesEnum} utility for {@link Source} implemenations.
|
||||
*
|
||||
*/
|
||||
public abstract static class SourceEnum extends ValuesEnum {
|
||||
protected final Source source;
|
||||
protected final int numDocs;
|
||||
protected int pos = -1;
|
||||
|
||||
|
||||
/**
|
||||
* Creates a new {@link SourceEnum}
|
||||
*
|
||||
* @param attrs
|
||||
* the {@link AttributeSource} for this enum
|
||||
* @param type
|
||||
* the enums {@link ValueType}
|
||||
* @param source
|
||||
* the source this enum operates on
|
||||
* @param numDocs
|
||||
* the number of documents within the source
|
||||
* If this {@link Source} is sorted this method will return an instance of
|
||||
* {@link SortedSource} otherwise <code>null</code>
|
||||
*/
|
||||
protected SourceEnum(AttributeSource attrs, ValueType type, Source source,
|
||||
int numDocs) {
|
||||
super(attrs, type);
|
||||
this.source = source;
|
||||
this.numDocs = numDocs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (pos == NO_MORE_DOCS)
|
||||
return NO_MORE_DOCS;
|
||||
return advance(pos + 1);
|
||||
public SortedSource asSortedSource() {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A sorted variant of {@link Source} for <tt>byte[]</tt> values per document.
|
||||
* <p>
|
||||
* Note: {@link ValuesEnum} obtained from a {@link SortedSource} will
|
||||
* enumerate values in document order and not in sorted order.
|
||||
*/
|
||||
public static abstract class SortedSource extends Source {
|
||||
|
||||
private final Comparator<BytesRef> comparator;
|
||||
|
||||
protected SortedSource(ValueType type, Comparator<BytesRef> comparator) {
|
||||
super(type);
|
||||
this.comparator = comparator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||
final int ord = ord(docID);
|
||||
|
@ -364,8 +241,7 @@ public abstract class IndexDocValues implements Closeable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns ord for specified docID. If this docID had not been added to the
|
||||
* Writer, the ord is 0. Ord is dense, ie, starts at 0, then increments by 1
|
||||
* Returns ord for specified docID. Ord is dense, ie, starts at 0, then increments by 1
|
||||
* for the next (as defined by {@link Comparator} value.
|
||||
*/
|
||||
public abstract int ord(int docID);
|
||||
|
@ -373,28 +249,13 @@ public abstract class IndexDocValues implements Closeable {
|
|||
/** Returns value for specified ord. */
|
||||
public abstract BytesRef getByOrd(int ord, BytesRef bytesRef);
|
||||
|
||||
|
||||
/**
|
||||
* Finds the ordinal whose value is greater or equal to the given value.
|
||||
*
|
||||
* @return the given values ordinal if found or otherwise
|
||||
* <code>(-(ord)-1)</code>, defined as the ordinal of the first
|
||||
* element that is greater than the given value. This guarantees
|
||||
* that the return value will always be >= 0 if the given value
|
||||
* is found.
|
||||
*
|
||||
*/
|
||||
public final int getByValue(BytesRef value) {
|
||||
return getByValue(value, new BytesRef());
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a lookup by value.
|
||||
*
|
||||
* @param value
|
||||
* the value to look up
|
||||
* @param tmpRef
|
||||
* a temporary {@link BytesRef} instance used to compare internal
|
||||
* @param spare
|
||||
* a spare {@link BytesRef} instance used to compare internal
|
||||
* values to the given value. Must not be <code>null</code>
|
||||
* @return the given values ordinal if found or otherwise
|
||||
* <code>(-(ord)-1)</code>, defined as the ordinal of the first
|
||||
|
@ -402,6 +263,37 @@ public abstract class IndexDocValues implements Closeable {
|
|||
* that the return value will always be >= 0 if the given value
|
||||
* is found.
|
||||
*/
|
||||
public abstract int getByValue(BytesRef value, BytesRef tmpRef);
|
||||
public int getByValue(BytesRef value, BytesRef spare) {
|
||||
return binarySearch(value, spare, 0, getValueCount() - 1);
|
||||
}
|
||||
|
||||
protected int binarySearch(BytesRef b, BytesRef bytesRef, int low,
|
||||
int high) {
|
||||
int mid = 0;
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
getByOrd(mid, bytesRef);
|
||||
final int cmp = comparator.compare(bytesRef, b);
|
||||
if (cmp < 0) {
|
||||
low = mid + 1;
|
||||
} else if (cmp > 0) {
|
||||
high = mid - 1;
|
||||
} else {
|
||||
return mid;
|
||||
}
|
||||
}
|
||||
assert comparator.compare(bytesRef, b) != 0;
|
||||
return -(low + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedSource asSortedSource() {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of unique values in this sorted source
|
||||
*/
|
||||
public abstract int getValueCount();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
package org.apache.lucene.index.values;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.values.FixedStraightBytesImpl.FixedStraightBytesEnum;
|
||||
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
|
@ -32,84 +32,43 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
*/
|
||||
abstract class IndexDocValuesArray extends Source {
|
||||
|
||||
static final Map<ValueType, IndexDocValuesArray> TEMPLATES;
|
||||
|
||||
static {
|
||||
EnumMap<ValueType, IndexDocValuesArray> templates = new EnumMap<ValueType, IndexDocValuesArray>(
|
||||
ValueType.class);
|
||||
templates.put(ValueType.FIXED_INTS_16, new ShortValues());
|
||||
templates.put(ValueType.FIXED_INTS_32, new IntValues());
|
||||
templates.put(ValueType.FIXED_INTS_64, new LongValues());
|
||||
templates.put(ValueType.FIXED_INTS_8, new ByteValues());
|
||||
templates.put(ValueType.FLOAT_32, new FloatValues());
|
||||
templates.put(ValueType.FLOAT_64, new DoubleValues());
|
||||
TEMPLATES = Collections.unmodifiableMap(templates);
|
||||
}
|
||||
|
||||
protected final int bytesPerValue;
|
||||
private final ValueType type;
|
||||
private final boolean isFloat;
|
||||
protected int maxDocID = -1;
|
||||
|
||||
IndexDocValuesArray(int bytesPerValue, ValueType type) {
|
||||
super(type);
|
||||
this.bytesPerValue = bytesPerValue;
|
||||
this.type = type;
|
||||
switch (type) {
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
isFloat = false;
|
||||
break;
|
||||
case FLOAT_32:
|
||||
case FLOAT_64:
|
||||
isFloat = true;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("illegal type: " + type);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public abstract IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException;
|
||||
|
||||
@Override
|
||||
public final int getValueCount() {
|
||||
return maxDocID + 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final ValueType type() {
|
||||
return type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||
if (isFloat) {
|
||||
return new SourceEnum(attrSource, type(), this, maxDocID + 1) {
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= numDocs) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
floatsRef.floats[intsRef.offset] = IndexDocValuesArray.this
|
||||
.getFloat(target);
|
||||
return pos = target;
|
||||
}
|
||||
};
|
||||
} else {
|
||||
return new SourceEnum(attrSource, type(), this, maxDocID + 1) {
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= numDocs) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
intsRef.ints[intsRef.offset] = IndexDocValuesArray.this
|
||||
.getInt(target);
|
||||
return pos = target;
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
abstract ValuesEnum getDirectEnum(AttributeSource attrSource,
|
||||
IndexInput input, int maxDoc) throws IOException;
|
||||
|
||||
@Override
|
||||
public final boolean hasArray() {
|
||||
return true;
|
||||
}
|
||||
|
||||
void toBytes(long value, BytesRef bytesRef) {
|
||||
bytesRef.copy(value);
|
||||
}
|
||||
|
||||
void toBytes(double value, BytesRef bytesRef) {
|
||||
bytesRef.copy(Double.doubleToRawLongBits(value));
|
||||
}
|
||||
|
||||
final static class ByteValues extends IndexDocValuesArray {
|
||||
private final byte[] values;
|
||||
|
||||
|
@ -122,7 +81,6 @@ abstract class IndexDocValuesArray extends Source {
|
|||
super(1, ValueType.FIXED_INTS_8);
|
||||
values = new byte[numDocs];
|
||||
input.readBytes(values, 0, values.length, false);
|
||||
maxDocID = numDocs - 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -136,25 +94,16 @@ abstract class IndexDocValuesArray extends Source {
|
|||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
|
||||
int maxDoc) throws IOException {
|
||||
return new FixedIntsEnum(attrSource, input, type(),
|
||||
bytesPerValue, maxDoc) {
|
||||
|
||||
@Override
|
||||
protected final long toLong(BytesRef bytesRef) {
|
||||
return bytesRef.bytes[bytesRef.offset];
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new ByteValues(input, numDocs);
|
||||
}
|
||||
|
||||
void toBytes(long value, BytesRef bytesRef) {
|
||||
bytesRef.bytes[0] = (byte) (0xFFL & value);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
final static class ShortValues extends IndexDocValuesArray {
|
||||
|
@ -171,7 +120,6 @@ abstract class IndexDocValuesArray extends Source {
|
|||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = input.readShort();
|
||||
}
|
||||
maxDocID = numDocs - 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -185,25 +133,16 @@ abstract class IndexDocValuesArray extends Source {
|
|||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
|
||||
int maxDoc) throws IOException {
|
||||
return new FixedIntsEnum(attrSource, input, type(),
|
||||
bytesPerValue, maxDoc) {
|
||||
|
||||
@Override
|
||||
protected final long toLong(BytesRef bytesRef) {
|
||||
return bytesRef.asShort();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new ShortValues(input, numDocs);
|
||||
}
|
||||
|
||||
void toBytes(long value, BytesRef bytesRef) {
|
||||
bytesRef.copy((short) (0xFFFFL & value));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
final static class IntValues extends IndexDocValuesArray {
|
||||
|
@ -220,7 +159,6 @@ abstract class IndexDocValuesArray extends Source {
|
|||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = input.readInt();
|
||||
}
|
||||
maxDocID = numDocs - 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -234,24 +172,16 @@ abstract class IndexDocValuesArray extends Source {
|
|||
return 0xFFFFFFFF & values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
|
||||
int maxDoc) throws IOException {
|
||||
return new FixedIntsEnum(attrSource, input, type(),
|
||||
bytesPerValue, maxDoc) {
|
||||
@Override
|
||||
protected final long toLong(BytesRef bytesRef) {
|
||||
return bytesRef.asInt();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new IntValues(input, numDocs);
|
||||
}
|
||||
|
||||
void toBytes(long value, BytesRef bytesRef) {
|
||||
bytesRef.copy((int) (0xFFFFFFFF & value));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
final static class LongValues extends IndexDocValuesArray {
|
||||
|
@ -268,7 +198,6 @@ abstract class IndexDocValuesArray extends Source {
|
|||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = input.readLong();
|
||||
}
|
||||
maxDocID = numDocs - 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -282,18 +211,6 @@ abstract class IndexDocValuesArray extends Source {
|
|||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
|
||||
int maxDoc) throws IOException {
|
||||
return new FixedIntsEnum(attrSource, input, type(),
|
||||
bytesPerValue, maxDoc) {
|
||||
@Override
|
||||
protected final long toLong(BytesRef bytesRef) {
|
||||
return bytesRef.asLong();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
|
@ -313,13 +230,13 @@ abstract class IndexDocValuesArray extends Source {
|
|||
private FloatValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(RamUsageEstimator.NUM_BYTES_FLOAT, ValueType.FLOAT_32);
|
||||
values = new float[numDocs];
|
||||
/* we always read BIG_ENDIAN here since the writer serialized plain bytes
|
||||
* we can simply read the ints / longs
|
||||
* back in using readInt / readLong */
|
||||
/*
|
||||
* we always read BIG_ENDIAN here since the writer serialized plain bytes
|
||||
* we can simply read the ints / longs back in using readInt / readLong
|
||||
*/
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = Float.intBitsToFloat(input.readInt());
|
||||
}
|
||||
maxDocID = numDocs - 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -332,17 +249,11 @@ abstract class IndexDocValuesArray extends Source {
|
|||
assert docID >= 0 && docID < values.length;
|
||||
return values[docID];
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
|
||||
int maxDoc) throws IOException {
|
||||
return new FloatsEnum(attrSource, input, type(),
|
||||
bytesPerValue, maxDoc) {
|
||||
@Override
|
||||
protected double toDouble(BytesRef bytesRef) {
|
||||
return Float.intBitsToFloat(bytesRef.asInt());
|
||||
}
|
||||
};
|
||||
void toBytes(double value, BytesRef bytesRef) {
|
||||
bytesRef.copy(Float.floatToRawIntBits((float)value));
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -351,7 +262,7 @@ abstract class IndexDocValuesArray extends Source {
|
|||
return new FloatValues(input, numDocs);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
final static class DoubleValues extends IndexDocValuesArray {
|
||||
private final double[] values;
|
||||
|
||||
|
@ -363,13 +274,13 @@ abstract class IndexDocValuesArray extends Source {
|
|||
private DoubleValues(IndexInput input, int numDocs) throws IOException {
|
||||
super(RamUsageEstimator.NUM_BYTES_DOUBLE, ValueType.FLOAT_64);
|
||||
values = new double[numDocs];
|
||||
/* we always read BIG_ENDIAN here since the writer serialized plain bytes
|
||||
* we can simply read the ints / longs
|
||||
* back in using readInt / readLong */
|
||||
/*
|
||||
* we always read BIG_ENDIAN here since the writer serialized plain bytes
|
||||
* we can simply read the ints / longs back in using readInt / readLong
|
||||
*/
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = Double.longBitsToDouble(input.readLong());
|
||||
}
|
||||
maxDocID = numDocs - 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -383,79 +294,12 @@ abstract class IndexDocValuesArray extends Source {
|
|||
return values[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
|
||||
int maxDoc) throws IOException {
|
||||
return new FloatsEnum(attrSource, input, type(),
|
||||
bytesPerValue, maxDoc) {
|
||||
@Override
|
||||
protected double toDouble(BytesRef bytesRef) {
|
||||
return Double.longBitsToDouble(bytesRef.asLong());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
|
||||
throws IOException {
|
||||
return new DoubleValues(input, numDocs);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
private abstract static class FixedIntsEnum extends
|
||||
FixedStraightBytesEnum {
|
||||
private final ValueType type;
|
||||
|
||||
private FixedIntsEnum(AttributeSource source, IndexInput dataIn,
|
||||
ValueType type, int bytesPerValue, int maxDoc) throws IOException {
|
||||
super(source, dataIn, bytesPerValue, maxDoc);
|
||||
this.type = type;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
final int advance = super.advance(target);
|
||||
if (advance != NO_MORE_DOCS) {
|
||||
intsRef.ints[0] = toLong(this.bytesRef);
|
||||
}
|
||||
return advance;
|
||||
}
|
||||
|
||||
protected abstract long toLong(BytesRef bytesRef);
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return type;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private abstract static class FloatsEnum extends FixedStraightBytesEnum {
|
||||
|
||||
private final ValueType type;
|
||||
FloatsEnum(AttributeSource source, IndexInput dataIn, ValueType type, int bytePerValue, int maxDoc)
|
||||
throws IOException {
|
||||
super(source, dataIn, bytePerValue, maxDoc);
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
final int retVal = super.advance(target);
|
||||
if (retVal != NO_MORE_DOCS) {
|
||||
floatsRef.floats[floatsRef.offset] = toDouble(bytesRef);
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
protected abstract double toDouble(BytesRef bytesRef);
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return type;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -19,14 +19,9 @@ package org.apache.lucene.index.values;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.values.IndexDocValuesArray.ByteValues;
|
||||
import org.apache.lucene.index.values.IndexDocValuesArray.IntValues;
|
||||
import org.apache.lucene.index.values.IndexDocValuesArray.LongValues;
|
||||
import org.apache.lucene.index.values.IndexDocValuesArray.ShortValues;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -37,10 +32,13 @@ import org.apache.lucene.util.IOUtils;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public final class Ints {
|
||||
protected static final String CODEC_NAME = "Ints";
|
||||
protected static final int VERSION_START = 0;
|
||||
protected static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
private Ints() {
|
||||
}
|
||||
|
||||
|
||||
public static Writer getWriter(Directory dir, String id, Counter bytesUsed,
|
||||
ValueType type, IOContext context) throws IOException {
|
||||
return type == ValueType.VAR_INTS ? new PackedIntValues.PackedIntsWriter(dir, id,
|
||||
|
@ -50,15 +48,42 @@ public final class Ints {
|
|||
public static IndexDocValues getValues(Directory dir, String id, int numDocs,
|
||||
ValueType type, IOContext context) throws IOException {
|
||||
return type == ValueType.VAR_INTS ? new PackedIntValues.PackedIntsReader(dir, id,
|
||||
numDocs, context) : new IntsReader(dir, id, numDocs, context);
|
||||
numDocs, context) : new IntsReader(dir, id, numDocs, context, type);
|
||||
}
|
||||
|
||||
private static ValueType sizeToType(int size) {
|
||||
switch (size) {
|
||||
case 1:
|
||||
return ValueType.FIXED_INTS_8;
|
||||
case 2:
|
||||
return ValueType.FIXED_INTS_16;
|
||||
case 4:
|
||||
return ValueType.FIXED_INTS_32;
|
||||
case 8:
|
||||
return ValueType.FIXED_INTS_64;
|
||||
default:
|
||||
throw new IllegalStateException("illegal size " + size);
|
||||
}
|
||||
}
|
||||
|
||||
private static int typeToSize(ValueType type) {
|
||||
switch (type) {
|
||||
case FIXED_INTS_16:
|
||||
return 2;
|
||||
case FIXED_INTS_32:
|
||||
return 4;
|
||||
case FIXED_INTS_64:
|
||||
return 8;
|
||||
case FIXED_INTS_8:
|
||||
return 1;
|
||||
default:
|
||||
throw new IllegalStateException("illegal type " + type);
|
||||
}
|
||||
}
|
||||
|
||||
static class IntsWriter extends FixedStraightBytesImpl.Writer {
|
||||
protected static final String CODEC_NAME = "Ints";
|
||||
protected static final int VERSION_START = 0;
|
||||
protected static final int VERSION_CURRENT = VERSION_START;
|
||||
|
||||
private final ValueType valueType;
|
||||
static class IntsWriter extends FixedStraightBytesImpl.Writer {
|
||||
private final IndexDocValuesArray template;
|
||||
|
||||
public IntsWriter(Directory dir, String id, Counter bytesUsed,
|
||||
IOContext context, ValueType valueType) throws IOException {
|
||||
|
@ -68,46 +93,15 @@ public final class Ints {
|
|||
protected IntsWriter(Directory dir, String id, String codecName,
|
||||
int version, Counter bytesUsed, IOContext context, ValueType valueType) throws IOException {
|
||||
super(dir, id, codecName, version, bytesUsed, context);
|
||||
this.valueType = valueType;
|
||||
final int expectedSize = getSize(valueType);
|
||||
final int expectedSize = typeToSize(valueType);
|
||||
this.bytesRef = new BytesRef(expectedSize);
|
||||
bytesRef.length = expectedSize;
|
||||
template = IndexDocValuesArray.TEMPLATES.get(valueType);
|
||||
}
|
||||
|
||||
private static int getSize(ValueType type) {
|
||||
switch (type) {
|
||||
case FIXED_INTS_16:
|
||||
return 2;
|
||||
case FIXED_INTS_32:
|
||||
return 4;
|
||||
case FIXED_INTS_64:
|
||||
return 8;
|
||||
case FIXED_INTS_8:
|
||||
return 1;
|
||||
default:
|
||||
throw new IllegalStateException("illegal type " + type);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, long v) throws IOException {
|
||||
switch (valueType) {
|
||||
case FIXED_INTS_64:
|
||||
bytesRef.copy(v);
|
||||
break;
|
||||
case FIXED_INTS_32:
|
||||
bytesRef.copy((int) (0xFFFFFFFF & v));
|
||||
break;
|
||||
case FIXED_INTS_16:
|
||||
bytesRef.copy((short) (0xFFFFL & v));
|
||||
break;
|
||||
case FIXED_INTS_8:
|
||||
bytesRef.bytes[0] = (byte) (0xFFL & v);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("illegal type " + valueType);
|
||||
}
|
||||
|
||||
template.toBytes(v, bytesRef);
|
||||
add(docID, bytesRef);
|
||||
}
|
||||
|
||||
|
@ -116,72 +110,27 @@ public final class Ints {
|
|||
add(docID, docValues.getInt());
|
||||
}
|
||||
}
|
||||
|
||||
final static class IntsReader extends FixedStraightBytesImpl.Reader {
|
||||
private final ValueType type;
|
||||
|
||||
final static class IntsReader extends FixedStraightBytesImpl.FixedStraightReader {
|
||||
private final IndexDocValuesArray arrayTemplate;
|
||||
|
||||
IntsReader(Directory dir, String id, int maxDoc, IOContext context)
|
||||
IntsReader(Directory dir, String id, int maxDoc, IOContext context, ValueType type)
|
||||
throws IOException {
|
||||
super(dir, id, IntsWriter.CODEC_NAME, IntsWriter.VERSION_CURRENT, maxDoc,
|
||||
context);
|
||||
switch (size) {
|
||||
case 8:
|
||||
type = ValueType.FIXED_INTS_64;
|
||||
arrayTemplate = new LongValues();
|
||||
break;
|
||||
case 4:
|
||||
type = ValueType.FIXED_INTS_32;
|
||||
arrayTemplate = new IntValues();
|
||||
break;
|
||||
case 2:
|
||||
type = ValueType.FIXED_INTS_16;
|
||||
arrayTemplate = new ShortValues();
|
||||
break;
|
||||
case 1:
|
||||
type = ValueType.FIXED_INTS_8;
|
||||
arrayTemplate = new ByteValues();
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("illegal size: " + size);
|
||||
}
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc,
|
||||
context, type);
|
||||
arrayTemplate = IndexDocValuesArray.TEMPLATES.get(type);
|
||||
assert arrayTemplate != null;
|
||||
assert type == sizeToType(size);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source load() throws IOException {
|
||||
boolean success = false;
|
||||
IndexInput input = null;
|
||||
final IndexInput indexInput = cloneData();
|
||||
try {
|
||||
input = cloneData();
|
||||
final Source source = arrayTemplate.newFromInput(input, maxDoc);
|
||||
success = true;
|
||||
return source;
|
||||
return arrayTemplate.newFromInput(indexInput, maxDoc);
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(input, datIn);
|
||||
}
|
||||
IOUtils.close(indexInput);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||
final IndexInput input = cloneData();
|
||||
boolean success = false;
|
||||
try {
|
||||
final ValuesEnum valuesEnum = arrayTemplate.getDirectEnum(source,
|
||||
input, maxDoc);
|
||||
success = true;
|
||||
return valuesEnum;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(input);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index.values;
|
|||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
|
||||
|
@ -28,6 +27,7 @@ import org.apache.lucene.util.ReaderUtil;
|
|||
* {@link IndexDocValues}
|
||||
*
|
||||
* @lucene.experimental
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class MultiIndexDocValues extends IndexDocValues {
|
||||
|
||||
|
@ -56,14 +56,9 @@ public class MultiIndexDocValues extends IndexDocValues {
|
|||
reset(docValuesIdx);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||
return new MultiValuesEnum(docValuesIdx, starts);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source load() throws IOException {
|
||||
return new MultiSource(docValuesIdx, starts);
|
||||
return new MultiSource(docValuesIdx, starts, false);
|
||||
}
|
||||
|
||||
public IndexDocValues reset(DocValuesIndex[] docValuesIdx) {
|
||||
|
@ -85,11 +80,6 @@ public class MultiIndexDocValues extends IndexDocValues {
|
|||
this.emptySoruce = new EmptySource(type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||
return emptySoruce.getEnum(attrSource);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source load() throws IOException {
|
||||
return emptySoruce;
|
||||
|
@ -99,69 +89,11 @@ public class MultiIndexDocValues extends IndexDocValues {
|
|||
public ValueType type() {
|
||||
return emptySoruce.type();
|
||||
}
|
||||
}
|
||||
|
||||
private static class MultiValuesEnum extends ValuesEnum {
|
||||
private DocValuesIndex[] docValuesIdx;
|
||||
private final int maxDoc;
|
||||
private int currentStart;
|
||||
private int currentMax;
|
||||
private int currentDoc = -1;
|
||||
private ValuesEnum currentEnum;
|
||||
private final int[] starts;
|
||||
|
||||
public MultiValuesEnum(DocValuesIndex[] docValuesIdx, int[] starts)
|
||||
throws IOException {
|
||||
super(docValuesIdx[0].docValues.type());
|
||||
this.docValuesIdx = docValuesIdx;
|
||||
final DocValuesIndex last = docValuesIdx[docValuesIdx.length - 1];
|
||||
maxDoc = last.start + last.length;
|
||||
final DocValuesIndex idx = docValuesIdx[0];
|
||||
currentEnum = idx.docValues.getEnum(this.attributes());
|
||||
currentEnum.copyFrom(this);
|
||||
intsRef = currentEnum.intsRef;
|
||||
currentMax = idx.length;
|
||||
currentStart = 0;
|
||||
this.starts = starts;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
currentEnum.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
assert target > currentDoc : "target " + target
|
||||
+ " must be > than the current doc " + currentDoc;
|
||||
int relativeDoc = target - currentStart;
|
||||
do {
|
||||
if (target >= maxDoc) {// we are beyond max doc
|
||||
return currentDoc = NO_MORE_DOCS;
|
||||
}
|
||||
if (target >= currentMax) {
|
||||
final int idx = ReaderUtil.subIndex(target, starts);
|
||||
currentEnum.close();
|
||||
currentEnum = docValuesIdx[idx].docValues.getEnum();
|
||||
currentEnum.copyFrom(this);
|
||||
currentStart = docValuesIdx[idx].start;
|
||||
currentMax = currentStart + docValuesIdx[idx].length;
|
||||
relativeDoc = target - currentStart;
|
||||
}
|
||||
target = currentMax; // make sure that we advance to the next enum if the current is exhausted
|
||||
|
||||
} while ((relativeDoc = currentEnum.advance(relativeDoc)) == NO_MORE_DOCS);
|
||||
return currentDoc = currentStart + relativeDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return currentDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(currentDoc + 1);
|
||||
public Source getDirectSource() throws IOException {
|
||||
return emptySoruce;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -171,12 +103,14 @@ public class MultiIndexDocValues extends IndexDocValues {
|
|||
private Source current;
|
||||
private final int[] starts;
|
||||
private final DocValuesIndex[] docValuesIdx;
|
||||
private boolean direct;
|
||||
|
||||
public MultiSource(DocValuesIndex[] docValuesIdx, int[] starts) {
|
||||
public MultiSource(DocValuesIndex[] docValuesIdx, int[] starts, boolean direct) {
|
||||
super(docValuesIdx[0].docValues.type());
|
||||
this.docValuesIdx = docValuesIdx;
|
||||
this.starts = starts;
|
||||
assert docValuesIdx.length != 0;
|
||||
|
||||
this.direct = direct;
|
||||
}
|
||||
|
||||
public long getInt(int docID) {
|
||||
|
@ -193,7 +127,11 @@ public class MultiIndexDocValues extends IndexDocValues {
|
|||
+ " for doc id: " + docID + " slices : " + Arrays.toString(starts);
|
||||
assert docValuesIdx[idx] != null;
|
||||
try {
|
||||
current = docValuesIdx[idx].docValues.getSource();
|
||||
if (direct) {
|
||||
current = docValuesIdx[idx].docValues.getDirectSource();
|
||||
} else {
|
||||
current = docValuesIdx[idx].docValues.getSource();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("load failed", e); // TODO how should we
|
||||
// handle this
|
||||
|
@ -214,24 +152,12 @@ public class MultiIndexDocValues extends IndexDocValues {
|
|||
final int doc = ensureSource(docID);
|
||||
return current.getBytes(doc, bytesRef);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||
throw new UnsupportedOperationException(); // TODO
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return docValuesIdx[0].docValues.type();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class EmptySource extends Source {
|
||||
private final ValueType type;
|
||||
|
||||
public EmptySource(ValueType type) {
|
||||
this.type = type;
|
||||
super(type);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -250,20 +176,15 @@ public class MultiIndexDocValues extends IndexDocValues {
|
|||
public long getInt(int docID) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||
return ValuesEnum.emptyEnum(type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return this.docValuesIdx[0].docValues.type();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source getDirectSource() throws IOException {
|
||||
return new MultiSource(docValuesIdx, starts, true);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,18 +21,15 @@ import java.io.IOException;
|
|||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.values.FixedStraightBytesImpl.FixedBytesWriterBase;
|
||||
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
|
||||
import org.apache.lucene.index.values.IndexDocValuesArray.LongValues;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/**
|
||||
|
@ -51,7 +48,6 @@ class PackedIntValues {
|
|||
|
||||
static class PackedIntsWriter extends FixedBytesWriterBase {
|
||||
|
||||
private LongsRef intsRef;
|
||||
private long minValue;
|
||||
private long maxValue;
|
||||
private boolean started;
|
||||
|
@ -114,10 +110,10 @@ class PackedIntValues {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void mergeDoc(int docID) throws IOException {
|
||||
protected void mergeDoc(int docID, int sourceDoc) throws IOException {
|
||||
assert docID > lastDocId : "docID: " + docID
|
||||
+ " must be greater than the last added doc id: " + lastDocId;
|
||||
add(docID, intsRef.get());
|
||||
add(docID, currentMergeSource.getInt(sourceDoc));
|
||||
}
|
||||
|
||||
private void writePackedInts(IndexOutput datOut, int docCount) throws IOException {
|
||||
|
@ -139,12 +135,6 @@ class PackedIntValues {
|
|||
w.add(defaultValue);
|
||||
}
|
||||
w.finish();
|
||||
w.finish();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setNextEnum(ValuesEnum valuesEnum) {
|
||||
intsRef = valuesEnum.getInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -215,30 +205,17 @@ class PackedIntValues {
|
|||
datIn.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||
final IndexInput input = (IndexInput) datIn.clone();
|
||||
boolean success = false;
|
||||
try {
|
||||
final ValuesEnum inst;
|
||||
if (values == null) {
|
||||
inst = new PackedIntsEnumImpl(source, input);
|
||||
} else {
|
||||
inst = values.getDirectEnum(source, input, numDocs);
|
||||
}
|
||||
success = true;
|
||||
return inst;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(input);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.VAR_INTS;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Source getDirectSource() throws IOException {
|
||||
return values != null ? new FixedStraightBytesImpl.DirectFixedStraightSource((IndexInput) datIn.clone(), 8, ValueType.FIXED_INTS_64) : new DirectPackedIntsSource((IndexInput) datIn.clone());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -248,7 +225,7 @@ class PackedIntValues {
|
|||
private final PackedInts.Reader values;
|
||||
|
||||
public PackedIntsSource(IndexInput dataIn) throws IOException {
|
||||
|
||||
super(ValueType.VAR_INTS);
|
||||
minValue = dataIn.readLong();
|
||||
defaultValue = dataIn.readLong();
|
||||
values = PackedInts.getReader(dataIn);
|
||||
|
@ -263,72 +240,41 @@ class PackedIntValues {
|
|||
final long value = values.get(docID);
|
||||
return value == defaultValue ? 0 : minValue + value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||
return new SourceEnum(attrSource, type(), this, values.size()) {
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= numDocs)
|
||||
return pos = NO_MORE_DOCS;
|
||||
intsRef.ints[intsRef.offset] = source.getInt(target);
|
||||
return pos = target;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.VAR_INTS;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class PackedIntsEnumImpl extends ValuesEnum {
|
||||
private final PackedInts.ReaderIterator ints;
|
||||
private static final class DirectPackedIntsSource extends Source {
|
||||
private final PackedInts.RandomAccessReaderIterator ints;
|
||||
private long minValue;
|
||||
private final IndexInput dataIn;
|
||||
private final long defaultValue;
|
||||
private final int maxDoc;
|
||||
private int pos = -1;
|
||||
|
||||
private PackedIntsEnumImpl(AttributeSource source, IndexInput dataIn)
|
||||
private DirectPackedIntsSource(IndexInput dataIn)
|
||||
throws IOException {
|
||||
super(source, ValueType.VAR_INTS);
|
||||
intsRef.offset = 0;
|
||||
this.dataIn = dataIn;
|
||||
super(ValueType.VAR_INTS);
|
||||
minValue = dataIn.readLong();
|
||||
defaultValue = dataIn.readLong();
|
||||
this.ints = PackedInts.getReaderIterator(dataIn);
|
||||
maxDoc = ints.size();
|
||||
this.ints = PackedInts.getRandomAccessReaderIterator(dataIn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
ints.close();
|
||||
dataIn.close();
|
||||
public double getFloat(int docID) {
|
||||
return getInt(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= maxDoc) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||
ref.grow(8);
|
||||
ref.copy(getInt(docID));
|
||||
return ref;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getInt(int docID) {
|
||||
try {
|
||||
final long val = ints.get(docID);
|
||||
return val == defaultValue ? 0 : minValue + val;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
final long val = ints.advance(target);
|
||||
intsRef.ints[intsRef.offset] = val == defaultValue ? 0 : minValue + val;
|
||||
return pos = target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (pos >= maxDoc) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
return advance(pos + 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,36 +18,29 @@ package org.apache.lucene.index.values;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.values.IndexDocValues.SortedSource;
|
||||
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Abstract base class for {@link IndexDocValues} {@link Source} /
|
||||
* {@link SortedSource} cache.
|
||||
* Abstract base class for {@link IndexDocValues} {@link Source} cache.
|
||||
* <p>
|
||||
* {@link Source} and {@link SortedSource} instances loaded via
|
||||
* {@link IndexDocValues#load()} and {@link IndexDocValues#loadSorted(Comparator)} are
|
||||
* entirely memory resident and need to be maintained by the caller. Each call
|
||||
* to {@link IndexDocValues#load()} or {@link IndexDocValues#loadSorted(Comparator)} will
|
||||
* cause an entire reload of the underlying data. Source and
|
||||
* {@link SortedSource} instances obtained from {@link IndexDocValues#getSource()}
|
||||
* and {@link IndexDocValues#getSource()} respectively are maintained by a
|
||||
* {@link SourceCache} that is closed ({@link #close(IndexDocValues)}) once the
|
||||
* {@link IndexReader} that created the {@link IndexDocValues} instance is closed.
|
||||
* {@link Source} instances loaded via {@link IndexDocValues#load()} are entirely memory resident
|
||||
* and need to be maintained by the caller. Each call to
|
||||
* {@link IndexDocValues#load()} will cause an entire reload of
|
||||
* the underlying data. Source instances obtained from
|
||||
* {@link IndexDocValues#getSource()} and {@link IndexDocValues#getSource()}
|
||||
* respectively are maintained by a {@link SourceCache} that is closed (
|
||||
* {@link #close(IndexDocValues)}) once the {@link IndexReader} that created the
|
||||
* {@link IndexDocValues} instance is closed.
|
||||
* <p>
|
||||
* Unless {@link Source} and {@link SortedSource} instances are managed by
|
||||
* another entity it is recommended to use the cached variants to obtain a
|
||||
* source instance.
|
||||
* Unless {@link Source} instances are managed by another entity it is
|
||||
* recommended to use the cached variants to obtain a source instance.
|
||||
* <p>
|
||||
* Implementation of this API must be thread-safe.
|
||||
*
|
||||
* @see IndexDocValues#setCache(SourceCache)
|
||||
* @see IndexDocValues#getSource()
|
||||
* @see IndexDocValues#getSortedSorted(Comparator)
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
@ -63,17 +56,7 @@ public abstract class SourceCache {
|
|||
public abstract Source load(IndexDocValues values) throws IOException;
|
||||
|
||||
/**
|
||||
* Atomically loads a {@link SortedSource} into the cache from the given
|
||||
* {@link IndexDocValues} and returns it iff no other {@link SortedSource} has
|
||||
* already been cached. Otherwise the cached source is returned.
|
||||
* <p>
|
||||
* This method will not return <code>null</code>
|
||||
*/
|
||||
public abstract SortedSource loadSorted(IndexDocValues values,
|
||||
Comparator<BytesRef> comp) throws IOException;
|
||||
|
||||
/**
|
||||
* Atomically invalidates the cached {@link Source} and {@link SortedSource}
|
||||
* Atomically invalidates the cached {@link Source}
|
||||
* instances if any and empties the cache.
|
||||
*/
|
||||
public abstract void invalidate(IndexDocValues values);
|
||||
|
@ -87,14 +70,13 @@ public abstract class SourceCache {
|
|||
|
||||
/**
|
||||
* Simple per {@link IndexDocValues} instance cache implementation that holds a
|
||||
* {@link Source} and {@link SortedSource} reference as a member variable.
|
||||
* {@link Source} a member variable.
|
||||
* <p>
|
||||
* If a {@link DirectSourceCache} instance is closed or invalidated the cached
|
||||
* reference are simply set to <code>null</code>
|
||||
*/
|
||||
public static final class DirectSourceCache extends SourceCache {
|
||||
private Source ref;
|
||||
private SortedSource sortedRef;
|
||||
|
||||
public synchronized Source load(IndexDocValues values) throws IOException {
|
||||
if (ref == null) {
|
||||
|
@ -103,17 +85,8 @@ public abstract class SourceCache {
|
|||
return ref;
|
||||
}
|
||||
|
||||
public synchronized SortedSource loadSorted(IndexDocValues values,
|
||||
Comparator<BytesRef> comp) throws IOException {
|
||||
if (sortedRef == null) {
|
||||
sortedRef = values.loadSorted(comp);
|
||||
}
|
||||
return sortedRef;
|
||||
}
|
||||
|
||||
public synchronized void invalidate(IndexDocValues values) {
|
||||
ref = null;
|
||||
sortedRef = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -43,9 +43,8 @@ public enum ValueType {
|
|||
* <p>
|
||||
* NOTE: this type uses <tt>0</tt> as the default value without any
|
||||
* distinction between provided <tt>0</tt> values during indexing. All
|
||||
* documents without an explicit value will use <tt>0</tt> instead. In turn,
|
||||
* {@link ValuesEnum} instances will not skip documents without an explicit
|
||||
* value assigned. Custom default values must be assigned explicitly.
|
||||
* documents without an explicit value will use <tt>0</tt> instead.
|
||||
* Custom default values must be assigned explicitly.
|
||||
* </p>
|
||||
*/
|
||||
VAR_INTS,
|
||||
|
@ -56,9 +55,8 @@ public enum ValueType {
|
|||
* <p>
|
||||
* NOTE: this type uses <tt>0</tt> as the default value without any
|
||||
* distinction between provided <tt>0</tt> values during indexing. All
|
||||
* documents without an explicit value will use <tt>0</tt> instead. In turn,
|
||||
* {@link ValuesEnum} instances will not skip documents without an explicit
|
||||
* value assigned. Custom default values must be assigned explicitly.
|
||||
* documents without an explicit value will use <tt>0</tt> instead.
|
||||
* Custom default values must be assigned explicitly.
|
||||
* </p>
|
||||
*/
|
||||
FIXED_INTS_8,
|
||||
|
@ -69,9 +67,8 @@ public enum ValueType {
|
|||
* <p>
|
||||
* NOTE: this type uses <tt>0</tt> as the default value without any
|
||||
* distinction between provided <tt>0</tt> values during indexing. All
|
||||
* documents without an explicit value will use <tt>0</tt> instead. In turn,
|
||||
* {@link ValuesEnum} instances will not skip documents without an explicit
|
||||
* value assigned. Custom default values must be assigned explicitly.
|
||||
* documents without an explicit value will use <tt>0</tt> instead.
|
||||
* Custom default values must be assigned explicitly.
|
||||
* </p>
|
||||
*/
|
||||
FIXED_INTS_16,
|
||||
|
@ -82,9 +79,8 @@ public enum ValueType {
|
|||
* <p>
|
||||
* NOTE: this type uses <tt>0</tt> as the default value without any
|
||||
* distinction between provided <tt>0</tt> values during indexing. All
|
||||
* documents without an explicit value will use <tt>0</tt> instead. In turn,
|
||||
* {@link ValuesEnum} instances will not skip documents without an explicit
|
||||
* value assigned. Custom default values must be assigned explicitly.
|
||||
* documents without an explicit value will use <tt>0</tt> instead.
|
||||
* Custom default values must be assigned explicitly.
|
||||
* </p>
|
||||
*/
|
||||
FIXED_INTS_32,
|
||||
|
@ -95,9 +91,8 @@ public enum ValueType {
|
|||
* <p>
|
||||
* NOTE: this type uses <tt>0</tt> as the default value without any
|
||||
* distinction between provided <tt>0</tt> values during indexing. All
|
||||
* documents without an explicit value will use <tt>0</tt> instead. In turn,
|
||||
* {@link ValuesEnum} instances will not skip documents without an explicit
|
||||
* value assigned. Custom default values must be assigned explicitly.
|
||||
* documents without an explicit value will use <tt>0</tt> instead.
|
||||
* Custom default values must be assigned explicitly.
|
||||
* </p>
|
||||
*/
|
||||
FIXED_INTS_64,
|
||||
|
@ -110,9 +105,8 @@ public enum ValueType {
|
|||
* <p>
|
||||
* NOTE: this type uses <tt>0.0f</tt> as the default value without any
|
||||
* distinction between provided <tt>0.0f</tt> values during indexing. All
|
||||
* documents without an explicit value will use <tt>0.0f</tt> instead. In
|
||||
* turn, {@link ValuesEnum} instances will not skip documents without an
|
||||
* explicit value assigned. Custom default values must be assigned explicitly.
|
||||
* documents without an explicit value will use <tt>0.0f</tt> instead.
|
||||
* Custom default values must be assigned explicitly.
|
||||
* </p>
|
||||
*/
|
||||
FLOAT_32,
|
||||
|
@ -126,9 +120,8 @@ public enum ValueType {
|
|||
* <p>
|
||||
* NOTE: this type uses <tt>0.0d</tt> as the default value without any
|
||||
* distinction between provided <tt>0.0d</tt> values during indexing. All
|
||||
* documents without an explicit value will use <tt>0.0d</tt> instead. In
|
||||
* turn, {@link ValuesEnum} instances will not skip documents without an
|
||||
* explicit value assigned. Custom default values must be assigned explicitly.
|
||||
* documents without an explicit value will use <tt>0.0d</tt> instead.
|
||||
* Custom default values must be assigned explicitly.
|
||||
* </p>
|
||||
*/
|
||||
FLOAT_64,
|
||||
|
@ -143,9 +136,7 @@ public enum ValueType {
|
|||
* NOTE: this type uses <tt>0 byte</tt> filled byte[] based on the length of the first seen
|
||||
* value as the default value without any distinction between explicitly
|
||||
* provided values during indexing. All documents without an explicit value
|
||||
* will use the default instead. In turn, {@link ValuesEnum} instances will
|
||||
* not skip documents without an explicit value assigned. Custom default
|
||||
* values must be assigned explicitly.
|
||||
* will use the default instead.Custom default values must be assigned explicitly.
|
||||
* </p>
|
||||
*/
|
||||
BYTES_FIXED_STRAIGHT,
|
||||
|
@ -159,33 +150,11 @@ public enum ValueType {
|
|||
* NOTE: Fields of this type will not store values for documents without and
|
||||
* explicitly provided value. If a documents value is accessed while no
|
||||
* explicit value is stored the returned {@link BytesRef} will be a 0-length
|
||||
* reference. In turn, {@link ValuesEnum} instances will skip over documents
|
||||
* without an explicit value assigned. Custom default values must be assigned
|
||||
* explicitly.
|
||||
* reference. Custom default values must be assigned explicitly.
|
||||
* </p>
|
||||
*/
|
||||
BYTES_FIXED_DEREF,
|
||||
|
||||
/**
|
||||
* A fixed length pre-sorted byte[] variant. Fields with this type only
|
||||
* store distinct byte values and store an additional offset pointer per
|
||||
* document to dereference the shared byte[]. The stored
|
||||
* byte[] is presorted, by default by unsigned byte order,
|
||||
* and allows access via document id, ordinal and by-value.
|
||||
* Use this type if your documents may share the same byte[].
|
||||
* <p>
|
||||
* NOTE: Fields of this type will not store values for documents without and
|
||||
* explicitly provided value. If a documents value is accessed while no
|
||||
* explicit value is stored the returned {@link BytesRef} will be a 0-length
|
||||
* reference. In turn, {@link ValuesEnum} instances will skip over documents
|
||||
* without an explicit value assigned. Custom default values must be assigned
|
||||
* explicitly.
|
||||
* </p>
|
||||
*
|
||||
* @see SortedSource
|
||||
*/
|
||||
BYTES_FIXED_SORTED,
|
||||
|
||||
/**
|
||||
* Variable length straight stored byte[] variant. All bytes are
|
||||
* stored sequentially for compactness. Usage of this type via the
|
||||
|
@ -195,9 +164,7 @@ public enum ValueType {
|
|||
* NOTE: Fields of this type will not store values for documents without an
|
||||
* explicitly provided value. If a documents value is accessed while no
|
||||
* explicit value is stored the returned {@link BytesRef} will be a 0-length
|
||||
* byte[] reference. In contrast to dereferenced variants, {@link ValuesEnum}
|
||||
* instances will <b>not</b> skip over documents without an explicit value
|
||||
* assigned. Custom default values must be assigned explicitly.
|
||||
* byte[] reference. Custom default values must be assigned explicitly.
|
||||
* </p>
|
||||
*/
|
||||
BYTES_VAR_STRAIGHT,
|
||||
|
@ -210,13 +177,12 @@ public enum ValueType {
|
|||
* NOTE: Fields of this type will not store values for documents without and
|
||||
* explicitly provided value. If a documents value is accessed while no
|
||||
* explicit value is stored the returned {@link BytesRef} will be a 0-length
|
||||
* reference. In turn, {@link ValuesEnum} instances will skip over documents
|
||||
* without an explicit value assigned. Custom default values must be assigned
|
||||
* explicitly.
|
||||
* reference. Custom default values must be assigned explicitly.
|
||||
* </p>
|
||||
*/
|
||||
BYTES_VAR_DEREF,
|
||||
|
||||
|
||||
/**
|
||||
* A variable length pre-sorted byte[] variant. Just like
|
||||
* {@link #BYTES_FIXED_SORTED}, but allowing each
|
||||
|
@ -225,12 +191,30 @@ public enum ValueType {
|
|||
* NOTE: Fields of this type will not store values for documents without and
|
||||
* explicitly provided value. If a documents value is accessed while no
|
||||
* explicit value is stored the returned {@link BytesRef} will be a 0-length
|
||||
* reference. In turn, {@link ValuesEnum} instances will skip over documents
|
||||
* without an explicit value assigned. Custom default values must be assigned
|
||||
* reference.Custom default values must be assigned explicitly.
|
||||
* </p>
|
||||
*
|
||||
* @see SortedSource
|
||||
*/
|
||||
BYTES_VAR_SORTED,
|
||||
|
||||
/**
|
||||
* A fixed length pre-sorted byte[] variant. Fields with this type only
|
||||
* store distinct byte values and store an additional offset pointer per
|
||||
* document to dereference the shared byte[]. The stored
|
||||
* byte[] is presorted, by default by unsigned byte order,
|
||||
* and allows access via document id, ordinal and by-value.
|
||||
* Use this type if your documents may share the same byte[].
|
||||
* <p>
|
||||
* NOTE: Fields of this type will not store values for documents without and
|
||||
* explicitly provided value. If a documents value is accessed while no
|
||||
* explicit value is stored the returned {@link BytesRef} will be a 0-length
|
||||
* reference. Custom default values must be assigned
|
||||
* explicitly.
|
||||
* </p>
|
||||
*
|
||||
* @see SortedSource
|
||||
*/
|
||||
BYTES_VAR_SORTED
|
||||
BYTES_FIXED_SORTED
|
||||
|
||||
}
|
||||
|
|
|
@ -1,156 +0,0 @@
|
|||
package org.apache.lucene.index.values;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FloatsRef;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
|
||||
/**
|
||||
* {@link ValuesEnum} is a {@link DocIdSetIterator} iterating <tt>byte[]</tt>
|
||||
* , <tt>long</tt> and <tt>double</tt> stored per document. Depending on the
|
||||
* enum's {@link ValueType} ({@link #type()}) the enum might skip over documents that
|
||||
* have no value stored. Types like {@link ValueType#BYTES_VAR_STRAIGHT} might not
|
||||
* skip over documents even if there is no value associated with a document. The
|
||||
* value for document without values again depends on the types implementation
|
||||
* although a reference for a {@link ValueType} returned from a accessor method
|
||||
* {@link #getFloat()}, {@link #getInt()} or {@link #bytes()} will never be
|
||||
* <code>null</code> even if a document has no value.
|
||||
* <p>
|
||||
* Note: Only the reference for the enum's type are initialized to non
|
||||
* <code>null</code> ie. {@link #getInt()} will always return <code>null</code>
|
||||
* if the enum's Type is {@link ValueType#FLOAT_32}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class ValuesEnum extends DocIdSetIterator {
|
||||
private AttributeSource source;
|
||||
private final ValueType enumType;
|
||||
protected BytesRef bytesRef = new BytesRef(1);
|
||||
protected FloatsRef floatsRef = new FloatsRef(1);
|
||||
protected LongsRef intsRef = new LongsRef(1);
|
||||
|
||||
/**
|
||||
* Creates a new {@link ValuesEnum} for the given type. The
|
||||
* {@link AttributeSource} for this enum is set to <code>null</code>
|
||||
*/
|
||||
protected ValuesEnum(ValueType enumType) {
|
||||
this(null, enumType);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link ValuesEnum} for the given type.
|
||||
*/
|
||||
protected ValuesEnum(AttributeSource source, ValueType enumType) {
|
||||
this.source = source;
|
||||
this.enumType = enumType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the type of this enum
|
||||
*/
|
||||
public ValueType type() {
|
||||
return enumType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a {@link BytesRef} or <code>null</code> if this enum doesn't
|
||||
* enumerate byte[] values
|
||||
*/
|
||||
public BytesRef bytes() {
|
||||
return bytesRef;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a {@link FloatsRef} or <code>null</code> if this enum doesn't
|
||||
* enumerate floating point values
|
||||
*/
|
||||
public FloatsRef getFloat() {
|
||||
return floatsRef;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a {@link LongsRef} or <code>null</code> if this enum doesn't
|
||||
* enumerate integer values.
|
||||
*/
|
||||
public LongsRef getInt() {
|
||||
return intsRef;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the internal state from the given enum
|
||||
*/
|
||||
protected void copyFrom(ValuesEnum valuesEnum) {
|
||||
intsRef = valuesEnum.intsRef;
|
||||
floatsRef = valuesEnum.floatsRef;
|
||||
bytesRef = valuesEnum.bytesRef;
|
||||
source = valuesEnum.source;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link AttributeSource} associated with this enum.
|
||||
* <p>
|
||||
* Note: this method might create a new AttribueSource if no
|
||||
* {@link AttributeSource} has been provided during enum creation.
|
||||
*/
|
||||
public AttributeSource attributes() {
|
||||
if (source == null) {
|
||||
source = new AttributeSource();
|
||||
}
|
||||
return source;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the enum
|
||||
*
|
||||
* @throws IOException
|
||||
* if an {@link IOException} occurs
|
||||
*/
|
||||
public abstract void close() throws IOException;
|
||||
|
||||
/**
|
||||
* Returns an empty {@link ValuesEnum} for the given {@link ValueType}.
|
||||
*/
|
||||
public static ValuesEnum emptyEnum(ValueType type) {
|
||||
return new ValuesEnum(type) {
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
|
@ -20,16 +20,17 @@ package org.apache.lucene.index.values;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesSourceBase;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesEnumBase;
|
||||
import org.apache.lucene.index.values.Bytes.BytesSourceBase;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
|
||||
import org.apache.lucene.index.values.DirectSource;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
// Stores variable-length byte[] by deref, ie when two docs
|
||||
// have the same value, they store only 1 byte[] and both
|
||||
|
@ -57,6 +58,7 @@ class VarDerefBytesImpl {
|
|||
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
|
||||
throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||
size = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -68,88 +70,82 @@ class VarDerefBytesImpl {
|
|||
// some last docs that we didn't see
|
||||
@Override
|
||||
public void finishInternal(int docCount) throws IOException {
|
||||
fillDefault(docCount);
|
||||
final int size = hash.size();
|
||||
final long[] addresses = new long[size+1];
|
||||
final long[] addresses = new long[size];
|
||||
final IndexOutput datOut = getOrCreateDataOut();
|
||||
int addr = 1;
|
||||
int addr = 0;
|
||||
final BytesRef bytesRef = new BytesRef();
|
||||
for (int i = 0; i < size; i++) {
|
||||
hash.get(i, bytesRef);
|
||||
addresses[i+1] = addr;
|
||||
addresses[i] = addr;
|
||||
addr += writePrefixLength(datOut, bytesRef) + bytesRef.length;
|
||||
datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
|
||||
}
|
||||
|
||||
final IndexOutput idxOut = getOrCreateIndexOut();
|
||||
// write the max address to read directly on source load
|
||||
idxOut.writeLong(addr - 1);
|
||||
writeIndex(idxOut, docCount, addresses[size], addresses, docToEntry);
|
||||
idxOut.writeLong(addr);
|
||||
writeIndex(idxOut, docCount, addresses[addresses.length-1], addresses, docToEntry);
|
||||
}
|
||||
}
|
||||
|
||||
public static class Reader extends BytesReaderBase {
|
||||
public static class VarDerefReader extends BytesReaderBase {
|
||||
private final long totalBytes;
|
||||
Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_START, true, context);
|
||||
VarDerefReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_START, true, context, ValueType.BYTES_VAR_DEREF);
|
||||
totalBytes = idxIn.readLong();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source load() throws IOException {
|
||||
return new Source(cloneData(), cloneIndex(), totalBytes);
|
||||
return new VarDerefSource(cloneData(), cloneIndex(), totalBytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source getDirectSource()
|
||||
throws IOException {
|
||||
return new DirectVarDerefSource(cloneData(), cloneIndex(), type());
|
||||
}
|
||||
}
|
||||
|
||||
final static class VarDerefSource extends BytesSourceBase {
|
||||
private final PackedInts.Reader addresses;
|
||||
|
||||
private final static class Source extends DerefBytesSourceBase {
|
||||
|
||||
public Source(IndexInput datIn, IndexInput idxIn, long totalBytes)
|
||||
throws IOException {
|
||||
super(datIn, idxIn, totalBytes, ValueType.BYTES_VAR_DEREF);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||
long address = addresses.get(docID);
|
||||
bytesRef.length = 0;
|
||||
return address == 0 ? bytesRef : data.fillSliceWithPrefix(bytesRef,
|
||||
--address);
|
||||
}
|
||||
public VarDerefSource(IndexInput datIn, IndexInput idxIn, long totalBytes)
|
||||
throws IOException {
|
||||
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), totalBytes,
|
||||
ValueType.BYTES_VAR_DEREF);
|
||||
addresses = PackedInts.getReader(idxIn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||
return new VarDerefBytesEnum(source, cloneData(), cloneIndex());
|
||||
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||
return data.fillSliceWithPrefix(bytesRef,
|
||||
addresses.get(docID));
|
||||
}
|
||||
}
|
||||
|
||||
final static class VarDerefBytesEnum extends DerefBytesEnumBase {
|
||||
|
||||
public VarDerefBytesEnum(AttributeSource source, IndexInput datIn,
|
||||
IndexInput idxIn) throws IOException {
|
||||
super(source, datIn, idxIn, -1, ValueType.BYTES_VAR_DEREF);
|
||||
}
|
||||
|
||||
final static class DirectVarDerefSource extends DirectSource {
|
||||
private final PackedInts.RandomAccessReaderIterator index;
|
||||
|
||||
@Override
|
||||
protected void fill(long address, BytesRef ref) throws IOException {
|
||||
datIn.seek(fp + --address);
|
||||
final byte sizeByte = datIn.readByte();
|
||||
final int size;
|
||||
if ((sizeByte & 128) == 0) {
|
||||
// length is 1 byte
|
||||
size = sizeByte;
|
||||
} else {
|
||||
size = ((sizeByte & 0x7f) << 8) | ((datIn.readByte() & 0xff));
|
||||
}
|
||||
if (ref.bytes.length < size) {
|
||||
ref.grow(size);
|
||||
}
|
||||
ref.length = size;
|
||||
ref.offset = 0;
|
||||
datIn.readBytes(ref.bytes, 0, size);
|
||||
}
|
||||
DirectVarDerefSource(IndexInput data, IndexInput index, ValueType type)
|
||||
throws IOException {
|
||||
super(data, type);
|
||||
this.index = PackedInts.getRandomAccessReaderIterator(index);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.BYTES_VAR_DEREF;
|
||||
protected int position(int docID) throws IOException {
|
||||
data.seek(baseOffset + index.get(docID));
|
||||
final byte sizeByte = data.readByte();
|
||||
if ((sizeByte & 128) == 0) {
|
||||
// length is 1 byte
|
||||
return sizeByte;
|
||||
} else {
|
||||
return ((sizeByte & 0x7f) << 8) | ((data.readByte() & 0xff));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,11 +23,11 @@ import java.util.Comparator;
|
|||
import org.apache.lucene.index.values.Bytes.BytesSortedSourceBase;
|
||||
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
|
||||
import org.apache.lucene.index.values.IndexDocValues.SortedSource;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
@ -39,7 +39,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
class VarSortedBytesImpl {
|
||||
final class VarSortedBytesImpl {
|
||||
|
||||
static final String CODEC_NAME = "VarDerefBytes";
|
||||
static final int VERSION_START = 0;
|
||||
|
@ -52,8 +52,9 @@ class VarSortedBytesImpl {
|
|||
Counter bytesUsed, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
|
||||
this.comp = comp;
|
||||
size = 0;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected void checkSize(BytesRef bytes) {
|
||||
// allow var bytes sizes
|
||||
|
@ -63,11 +64,11 @@ class VarSortedBytesImpl {
|
|||
// some last docs that we didn't see
|
||||
@Override
|
||||
public void finishInternal(int docCount) throws IOException {
|
||||
fillDefault(docCount);
|
||||
final int count = hash.size();
|
||||
final IndexOutput datOut = getOrCreateDataOut();
|
||||
long offset = 0;
|
||||
long lastOffset = 0;
|
||||
final int[] index = new int[count+1];
|
||||
final int[] index = new int[count];
|
||||
final long[] offsets = new long[count];
|
||||
final int[] sortedEntries = hash.sort(comp);
|
||||
// first dump bytes data, recording index & offset as
|
||||
|
@ -75,173 +76,125 @@ class VarSortedBytesImpl {
|
|||
for (int i = 0; i < count; i++) {
|
||||
final int e = sortedEntries[i];
|
||||
offsets[i] = offset;
|
||||
index[e+1] = 1 + i;
|
||||
index[e] = i;
|
||||
|
||||
final BytesRef bytes = hash.get(e, new BytesRef());
|
||||
// TODO: we could prefix code...
|
||||
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
|
||||
lastOffset = offset;
|
||||
offset += bytes.length;
|
||||
}
|
||||
|
||||
final IndexOutput idxOut = getOrCreateIndexOut();
|
||||
// total bytes of data
|
||||
idxOut.writeLong(offset);
|
||||
// write index -- first doc -> 1+ord
|
||||
// write index
|
||||
writeIndex(idxOut, docCount, count, index, docToEntry);
|
||||
// next ord (0-based) -> offset
|
||||
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count,
|
||||
PackedInts.bitsRequired(lastOffset));
|
||||
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count+1,
|
||||
PackedInts.bitsRequired(offset));
|
||||
for (int i = 0; i < count; i++) {
|
||||
offsetWriter.add(offsets[i]);
|
||||
}
|
||||
offsetWriter.add(offset);
|
||||
offsetWriter.finish();
|
||||
}
|
||||
}
|
||||
|
||||
public static class Reader extends BytesReaderBase {
|
||||
|
||||
private final Comparator<BytesRef> defaultComp;
|
||||
private final Comparator<BytesRef> comparator;
|
||||
|
||||
Reader(Directory dir, String id, int maxDoc,
|
||||
Comparator<BytesRef> comparator, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_START, true, context);
|
||||
this.defaultComp = comparator;
|
||||
IOContext context, ValueType type, Comparator<BytesRef> comparator)
|
||||
throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_START, true, context, type);
|
||||
this.comparator = comparator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public org.apache.lucene.index.values.IndexDocValues.Source load()
|
||||
throws IOException {
|
||||
return loadSorted(defaultComp);
|
||||
return new VarSortedSource(cloneData(), cloneIndex(), comparator);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedSource loadSorted(Comparator<BytesRef> comp)
|
||||
throws IOException {
|
||||
IndexInput indexIn = cloneIndex();
|
||||
return new Source(cloneData(), indexIn, comp, indexIn.readLong());
|
||||
public Source getDirectSource() throws IOException {
|
||||
return new DirectSortedSource(cloneData(), cloneIndex(), comparator, type());
|
||||
}
|
||||
|
||||
}
|
||||
private static final class VarSortedSource extends BytesSortedSourceBase {
|
||||
private final PackedInts.Reader ordToOffsetIndex; // 0-based
|
||||
private final int valueCount;
|
||||
|
||||
VarSortedSource(IndexInput datIn, IndexInput idxIn,
|
||||
Comparator<BytesRef> comp) throws IOException {
|
||||
super(datIn, idxIn, comp, idxIn.readLong(), ValueType.BYTES_VAR_SORTED);
|
||||
ordToOffsetIndex = PackedInts.getReader(idxIn);
|
||||
valueCount = ordToOffsetIndex.size()-1; // the last value here is just a dummy value to get the length of the last value
|
||||
closeIndexInput();
|
||||
}
|
||||
|
||||
private static class Source extends BytesSortedSourceBase {
|
||||
private final PackedInts.Reader ordToOffsetIndex; // 0-based
|
||||
private final long totBytes;
|
||||
private final int valueCount;
|
||||
@Override
|
||||
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
|
||||
final long offset = ordToOffsetIndex.get(ord);
|
||||
final long nextOffset = ordToOffsetIndex.get(1 + ord);
|
||||
data.fillSlice(bytesRef, offset, (int) (nextOffset - offset));
|
||||
return bytesRef;
|
||||
}
|
||||
|
||||
public Source(IndexInput datIn, IndexInput idxIn,
|
||||
Comparator<BytesRef> comp, long dataLength) throws IOException {
|
||||
super(datIn, idxIn, comp, dataLength, ValueType.BYTES_VAR_SORTED);
|
||||
totBytes = dataLength;
|
||||
ordToOffsetIndex = PackedInts.getReader(idxIn);
|
||||
valueCount = ordToOffsetIndex.size();
|
||||
closeIndexInput();
|
||||
}
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return valueCount;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getByValue(BytesRef bytes, BytesRef tmpRef) {
|
||||
return binarySearch(bytes, tmpRef, 0, valueCount - 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getValueCount() {
|
||||
return valueCount;
|
||||
}
|
||||
private static final class DirectSortedSource extends SortedSource {
|
||||
private final PackedInts.Reader docToOrdIndex;
|
||||
private final PackedInts.RandomAccessReaderIterator ordToOffsetIndex;
|
||||
private final IndexInput datIn;
|
||||
private final long basePointer;
|
||||
private final int valueCount;
|
||||
|
||||
DirectSortedSource(IndexInput datIn, IndexInput idxIn,
|
||||
Comparator<BytesRef> comparator, ValueType type) throws IOException {
|
||||
super(type, comparator);
|
||||
idxIn.readLong();
|
||||
docToOrdIndex = PackedInts.getReader(idxIn); // read the ords in to prevent too many random disk seeks
|
||||
ordToOffsetIndex = PackedInts.getRandomAccessReaderIterator(idxIn);
|
||||
valueCount = ordToOffsetIndex.size()-1; // the last value here is just a dummy value to get the length of the last value
|
||||
basePointer = datIn.getFilePointer();
|
||||
this.datIn = datIn;
|
||||
}
|
||||
|
||||
// ord is 0-based
|
||||
@Override
|
||||
protected BytesRef deref(int ord, BytesRef bytesRef) {
|
||||
final long nextOffset;
|
||||
if (ord == valueCount - 1) {
|
||||
nextOffset = totBytes;
|
||||
} else {
|
||||
nextOffset = ordToOffsetIndex.get(1 + ord);
|
||||
}
|
||||
@Override
|
||||
public int ord(int docID) {
|
||||
return (int) docToOrdIndex.get(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
|
||||
try {
|
||||
final long offset = ordToOffsetIndex.get(ord);
|
||||
data.fillSlice(bytesRef, offset, (int) (nextOffset - offset));
|
||||
return bytesRef;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||
return new VarSortedBytesEnum(source, cloneData(), cloneIndex());
|
||||
}
|
||||
|
||||
private static class VarSortedBytesEnum extends ValuesEnum {
|
||||
private PackedInts.Reader docToOrdIndex;
|
||||
private PackedInts.Reader ordToOffsetIndex;
|
||||
private IndexInput idxIn;
|
||||
private IndexInput datIn;
|
||||
private int valueCount;
|
||||
private long totBytes;
|
||||
private int docCount;
|
||||
private int pos = -1;
|
||||
private final long fp;
|
||||
|
||||
protected VarSortedBytesEnum(AttributeSource source, IndexInput datIn,
|
||||
IndexInput idxIn) throws IOException {
|
||||
super(source, ValueType.BYTES_VAR_SORTED);
|
||||
totBytes = idxIn.readLong();
|
||||
// keep that in memory to prevent lots of disk seeks
|
||||
docToOrdIndex = PackedInts.getReader(idxIn);
|
||||
ordToOffsetIndex = PackedInts.getReader(idxIn);
|
||||
valueCount = ordToOffsetIndex.size();
|
||||
docCount = docToOrdIndex.size();
|
||||
fp = datIn.getFilePointer();
|
||||
this.idxIn = idxIn;
|
||||
this.datIn = datIn;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
idxIn.close();
|
||||
datIn.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= docCount) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
int ord;
|
||||
while ((ord = (int) docToOrdIndex.get(target)) == 0) {
|
||||
if (++target >= docCount) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
final long offset = ordToOffsetIndex.get(--ord);
|
||||
final long nextOffset;
|
||||
if (ord == valueCount - 1) {
|
||||
nextOffset = totBytes;
|
||||
} else {
|
||||
nextOffset = ordToOffsetIndex.get(1 + ord);
|
||||
}
|
||||
final long nextOffset = ordToOffsetIndex.next();
|
||||
datIn.seek(basePointer + offset);
|
||||
final int length = (int) (nextOffset - offset);
|
||||
datIn.seek(fp + offset);
|
||||
if (bytesRef.bytes.length < length)
|
||||
if (bytesRef.bytes.length < length) {
|
||||
bytesRef.grow(length);
|
||||
}
|
||||
datIn.readBytes(bytesRef.bytes, 0, length);
|
||||
bytesRef.length = length;
|
||||
bytesRef.offset = 0;
|
||||
return pos = target;
|
||||
}
|
||||
return bytesRef;
|
||||
} catch (IOException ex) {
|
||||
throw new IllegalStateException("failed", ex);
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (pos >= docCount) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
return advance(pos + 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.BYTES_VAR_SORTED;
|
||||
public int getValueCount() {
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,18 +20,19 @@ package org.apache.lucene.index.values;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||
import org.apache.lucene.index.values.Bytes.BytesSourceBase;
|
||||
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
|
||||
import org.apache.lucene.index.values.Bytes.DerefBytesSourceBase;
|
||||
import org.apache.lucene.index.values.DirectSource;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
@ -66,7 +67,7 @@ class VarStraightBytesImpl {
|
|||
}
|
||||
|
||||
// Fills up to but not including this docID
|
||||
private void fill(final int docID) {
|
||||
private void fill(final int docID, final long nextAddress) {
|
||||
if (docID >= docToAddress.length) {
|
||||
int oldSize = docToAddress.length;
|
||||
docToAddress = ArrayUtil.grow(docToAddress, 1 + docID);
|
||||
|
@ -74,7 +75,7 @@ class VarStraightBytesImpl {
|
|||
* RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
for (int i = lastDocID + 1; i < docID; i++) {
|
||||
docToAddress[i] = address;
|
||||
docToAddress[i] = nextAddress;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -84,7 +85,7 @@ class VarStraightBytesImpl {
|
|||
if (bytes.length == 0) {
|
||||
return; // default
|
||||
}
|
||||
fill(docID);
|
||||
fill(docID, address);
|
||||
docToAddress[docID] = address;
|
||||
pool.copy(bytes);
|
||||
address += bytes.length;
|
||||
|
@ -97,15 +98,15 @@ class VarStraightBytesImpl {
|
|||
datOut = getOrCreateDataOut();
|
||||
boolean success = false;
|
||||
try {
|
||||
if (state.liveDocs == null && state.reader instanceof Reader) {
|
||||
if (state.liveDocs == null && state.reader instanceof VarStraightReader) {
|
||||
// bulk merge since we don't have any deletes
|
||||
Reader reader = (Reader) state.reader;
|
||||
VarStraightReader reader = (VarStraightReader) state.reader;
|
||||
final int maxDocs = reader.maxDoc;
|
||||
if (maxDocs == 0) {
|
||||
return;
|
||||
}
|
||||
if (lastDocID+1 < state.docBase) {
|
||||
fill(state.docBase);
|
||||
fill(state.docBase, address);
|
||||
lastDocID = state.docBase-1;
|
||||
}
|
||||
final long numDataBytes;
|
||||
|
@ -147,13 +148,14 @@ class VarStraightBytesImpl {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void mergeDoc(int docID) throws IOException {
|
||||
protected void mergeDoc(int docID, int sourceDoc) throws IOException {
|
||||
assert merge;
|
||||
assert lastDocID < docID;
|
||||
currentMergeSource.getBytes(sourceDoc, bytesRef);
|
||||
if (bytesRef.length == 0) {
|
||||
return; // default
|
||||
}
|
||||
fill(docID);
|
||||
fill(docID, address);
|
||||
datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
|
||||
docToAddress[docID] = address;
|
||||
address += bytesRef.length;
|
||||
|
@ -186,20 +188,21 @@ class VarStraightBytesImpl {
|
|||
try {
|
||||
if (lastDocID == -1) {
|
||||
idxOut.writeVLong(0);
|
||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1,
|
||||
PackedInts.bitsRequired(0));
|
||||
for (int i = 0; i < docCount; i++) {
|
||||
for (int i = 0; i < docCount+1; i++) {
|
||||
w.add(0);
|
||||
}
|
||||
w.finish();
|
||||
} else {
|
||||
fill(docCount);
|
||||
fill(docCount, address);
|
||||
idxOut.writeVLong(address);
|
||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1,
|
||||
PackedInts.bitsRequired(address));
|
||||
for (int i = 0; i < docCount; i++) {
|
||||
w.add(docToAddress[i]);
|
||||
}
|
||||
w.add(address);
|
||||
w.finish();
|
||||
}
|
||||
success = true;
|
||||
|
@ -220,115 +223,59 @@ class VarStraightBytesImpl {
|
|||
}
|
||||
}
|
||||
|
||||
public static class Reader extends BytesReaderBase {
|
||||
public static class VarStraightReader extends BytesReaderBase {
|
||||
private final int maxDoc;
|
||||
|
||||
Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_START, true, context);
|
||||
VarStraightReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
|
||||
super(dir, id, CODEC_NAME, VERSION_START, true, context, ValueType.BYTES_VAR_STRAIGHT);
|
||||
this.maxDoc = maxDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Source load() throws IOException {
|
||||
return new Source(cloneData(), cloneIndex());
|
||||
}
|
||||
|
||||
private class Source extends DerefBytesSourceBase {
|
||||
|
||||
public Source(IndexInput datIn, IndexInput idxIn) throws IOException {
|
||||
super(datIn, idxIn, idxIn.readVLong(), ValueType.BYTES_VAR_STRAIGHT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||
final long address = addresses.get(docID);
|
||||
final int length = docID == maxDoc - 1 ? (int) (totalLengthInBytes - address)
|
||||
: (int) (addresses.get(1 + docID) - address);
|
||||
return data.fillSlice(bytesRef, address, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||
return new SourceEnum(attrSource, type(), this, maxDoc()) {
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= numDocs) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
source.getBytes(target, bytesRef);
|
||||
return pos = target;
|
||||
}
|
||||
};
|
||||
}
|
||||
return new VarStraightSource(cloneData(), cloneIndex());
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||
return new VarStraightBytesEnum(source, cloneData(), cloneIndex());
|
||||
public Source getDirectSource()
|
||||
throws IOException {
|
||||
return new DirectVarStraightSource(cloneData(), cloneIndex(), type());
|
||||
}
|
||||
}
|
||||
|
||||
private static final class VarStraightSource extends BytesSourceBase {
|
||||
private final PackedInts.Reader addresses;
|
||||
|
||||
private class VarStraightBytesEnum extends ValuesEnum {
|
||||
private final PackedInts.ReaderIterator addresses;
|
||||
private final IndexInput datIn;
|
||||
private final IndexInput idxIn;
|
||||
private final long fp;
|
||||
private final long totBytes;
|
||||
private int pos = -1;
|
||||
private long nextAddress;
|
||||
|
||||
protected VarStraightBytesEnum(AttributeSource source, IndexInput datIn,
|
||||
IndexInput idxIn) throws IOException {
|
||||
super(source, ValueType.BYTES_VAR_STRAIGHT);
|
||||
totBytes = idxIn.readVLong();
|
||||
fp = datIn.getFilePointer();
|
||||
addresses = PackedInts.getReaderIterator(idxIn);
|
||||
this.datIn = datIn;
|
||||
this.idxIn = idxIn;
|
||||
nextAddress = addresses.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
datIn.close();
|
||||
idxIn.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(final int target) throws IOException {
|
||||
if (target >= maxDoc) {
|
||||
return pos = NO_MORE_DOCS;
|
||||
}
|
||||
final long addr = pos+1 == target ? nextAddress : addresses.advance(target);
|
||||
if (addr == totBytes) { // empty values at the end
|
||||
bytesRef.length = 0;
|
||||
bytesRef.offset = 0;
|
||||
return pos = target;
|
||||
}
|
||||
datIn.seek(fp + addr);
|
||||
final int size = (int) (target == maxDoc - 1 ? totBytes - addr
|
||||
: (nextAddress = addresses.next()) - addr);
|
||||
if (bytesRef.bytes.length < size) {
|
||||
bytesRef.grow(size);
|
||||
}
|
||||
bytesRef.length = size;
|
||||
datIn.readBytes(bytesRef.bytes, 0, size);
|
||||
return pos = target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(pos + 1);
|
||||
}
|
||||
public VarStraightSource(IndexInput datIn, IndexInput idxIn) throws IOException {
|
||||
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), idxIn.readVLong(),
|
||||
ValueType.BYTES_VAR_STRAIGHT);
|
||||
addresses = PackedInts.getReader(idxIn);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType type() {
|
||||
return ValueType.BYTES_VAR_STRAIGHT;
|
||||
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||
final long address = addresses.get(docID);
|
||||
return data.fillSlice(bytesRef, address,
|
||||
(int) (addresses.get(docID + 1) - address));
|
||||
}
|
||||
}
|
||||
|
||||
public final static class DirectVarStraightSource extends DirectSource {
|
||||
|
||||
private final PackedInts.RandomAccessReaderIterator index;
|
||||
|
||||
DirectVarStraightSource(IndexInput data, IndexInput index, ValueType type)
|
||||
throws IOException {
|
||||
super(data, type);
|
||||
index.readVLong();
|
||||
this.index = PackedInts.getRandomAccessReaderIterator(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int position(int docID) throws IOException {
|
||||
final long offset = index.get(docID);
|
||||
data.seek(baseOffset + offset);
|
||||
return (int) (index.next() - offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
|||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.index.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -39,7 +40,7 @@ import org.apache.lucene.util.Counter;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class Writer extends DocValuesConsumer {
|
||||
|
||||
protected Source currentMergeSource;
|
||||
/**
|
||||
* Creates a new {@link Writer}.
|
||||
*
|
||||
|
@ -99,31 +100,32 @@ public abstract class Writer extends DocValuesConsumer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Records a value from the given document id. The methods implementation
|
||||
* obtains the value for the document id from the last {@link ValuesEnum}
|
||||
* set to {@link #setNextEnum(ValuesEnum)}.
|
||||
* Merges a document with the given <code>docID</code>. The methods
|
||||
* implementation obtains the value for the <i>sourceDoc</i> id from the
|
||||
* current {@link Source} set to <i>setNextMergeSource(Source)</i>.
|
||||
* <p>
|
||||
* This method is used during merging to provide implementation agnostic
|
||||
* default merge implementation.
|
||||
* </p>
|
||||
* <p>
|
||||
* The given document id must be the same document id returned from
|
||||
* {@link ValuesEnum#docID()} when this method is called. All documents IDs
|
||||
* between the given ID and the previously given ID or <tt>0</tt> if the
|
||||
* method is call the first time are filled with default values depending on
|
||||
* the {@link Writer} implementation. The given document ID must always be
|
||||
* greater than the previous ID or <tt>0</tt> if called the first time.
|
||||
* All documents IDs between the given ID and the previously given ID or
|
||||
* <tt>0</tt> if the method is call the first time are filled with default
|
||||
* values depending on the {@link Writer} implementation. The given document
|
||||
* ID must always be greater than the previous ID or <tt>0</tt> if called the
|
||||
* first time.
|
||||
*/
|
||||
protected abstract void mergeDoc(int docID) throws IOException;
|
||||
protected abstract void mergeDoc(int docID, int sourceDoc) throws IOException;
|
||||
|
||||
/**
|
||||
* Sets the next {@link ValuesEnum} to consume values from on calls to
|
||||
* {@link #mergeDoc(int)}
|
||||
* Sets the next {@link Source} to consume values from on calls to
|
||||
* {@link #mergeDoc(int, int)}
|
||||
*
|
||||
* @param valuesEnum
|
||||
* the next {@link ValuesEnum}, this must not be null
|
||||
* @param mergeSource
|
||||
* the next {@link Source}, this must not be null
|
||||
*/
|
||||
protected abstract void setNextEnum(ValuesEnum valuesEnum);
|
||||
protected void setNextMergeSource(Source mergeSource) {
|
||||
currentMergeSource = mergeSource;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finish writing and close any files and resources used by this Writer.
|
||||
|
@ -141,34 +143,20 @@ public abstract class Writer extends DocValuesConsumer {
|
|||
// simply override this and decide if they want to merge
|
||||
// segments using this generic implementation or if a bulk merge is possible
|
||||
// / feasible.
|
||||
final ValuesEnum valEnum = state.reader.getEnum();
|
||||
assert valEnum != null;
|
||||
try {
|
||||
setNextEnum(valEnum); // set the current enum we are working on - the
|
||||
// impl. will get the correct reference for the type
|
||||
// it supports
|
||||
int docID = state.docBase;
|
||||
final Bits liveDocs = state.liveDocs;
|
||||
final int docCount = state.docCount;
|
||||
int currentDocId;
|
||||
if ((currentDocId = valEnum.advance(0)) != ValuesEnum.NO_MORE_DOCS) {
|
||||
for (int i = 0; i < docCount; i++) {
|
||||
if (liveDocs == null || liveDocs.get(i)) {
|
||||
if (currentDocId < i) {
|
||||
if ((currentDocId = valEnum.advance(i)) == ValuesEnum.NO_MORE_DOCS) {
|
||||
break; // advance can jump over default values
|
||||
}
|
||||
}
|
||||
if (currentDocId == i) { // we are on the doc to merge
|
||||
mergeDoc(docID);
|
||||
}
|
||||
++docID;
|
||||
}
|
||||
}
|
||||
final Source source = state.reader.getDirectSource();
|
||||
assert source != null;
|
||||
setNextMergeSource(source); // set the current enum we are working on - the
|
||||
// impl. will get the correct reference for the type
|
||||
// it supports
|
||||
int docID = state.docBase;
|
||||
final Bits liveDocs = state.liveDocs;
|
||||
final int docCount = state.docCount;
|
||||
for (int i = 0; i < docCount; i++) {
|
||||
if (liveDocs == null || liveDocs.get(i)) {
|
||||
mergeDoc(docID++, i);
|
||||
}
|
||||
} finally {
|
||||
valEnum.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -182,11 +170,6 @@ public abstract class Writer extends DocValuesConsumer {
|
|||
* the file name id used to create files within the writer.
|
||||
* @param directory
|
||||
* the {@link Directory} to create the files from.
|
||||
* @param comp
|
||||
* a {@link BytesRef} comparator used for {@link Bytes} variants. If
|
||||
* <code>null</code>
|
||||
* {@link BytesRef#getUTF8SortedAsUnicodeComparator()} is used as the
|
||||
* default.
|
||||
* @param bytesUsed
|
||||
* a byte-usage tracking reference
|
||||
* @return a new {@link Writer} instance for the given {@link ValueType}
|
||||
|
@ -205,28 +188,27 @@ public abstract class Writer extends DocValuesConsumer {
|
|||
case VAR_INTS:
|
||||
return Ints.getWriter(directory, id, bytesUsed, type, context);
|
||||
case FLOAT_32:
|
||||
return Floats.getWriter(directory, id, 4, bytesUsed, context);
|
||||
return Floats.getWriter(directory, id, bytesUsed, context, type);
|
||||
case FLOAT_64:
|
||||
return Floats.getWriter(directory, id, 8, bytesUsed, context);
|
||||
return Floats.getWriter(directory, id, bytesUsed, context, type);
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, true,
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, true, comp,
|
||||
bytesUsed, context);
|
||||
case BYTES_FIXED_DEREF:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, true,
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, true, comp,
|
||||
bytesUsed, context);
|
||||
case BYTES_FIXED_SORTED:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, true,
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, true, comp,
|
||||
bytesUsed, context);
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, false,
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, false, comp,
|
||||
bytesUsed, context);
|
||||
case BYTES_VAR_DEREF:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, false,
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, false, comp,
|
||||
bytesUsed, context);
|
||||
case BYTES_VAR_SORTED:
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, false,
|
||||
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, false, comp,
|
||||
bytesUsed, context);
|
||||
|
||||
default:
|
||||
throw new IllegalArgumentException("Unknown Values: " + type);
|
||||
}
|
||||
|
|
|
@ -1,109 +0,0 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Represents double[], as a slice (offset + length) into an existing double[].
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class FloatsRef implements Cloneable {
|
||||
public double[] floats;
|
||||
public int offset;
|
||||
public int length;
|
||||
|
||||
public FloatsRef() {
|
||||
}
|
||||
|
||||
public FloatsRef(int capacity) {
|
||||
floats = new double[capacity];
|
||||
}
|
||||
|
||||
public void set(double value) {
|
||||
floats[offset] = value;
|
||||
}
|
||||
|
||||
public double get() {
|
||||
return floats[offset];
|
||||
}
|
||||
|
||||
public FloatsRef(double[] floats, int offset, int length) {
|
||||
this.floats = floats;
|
||||
this.offset = offset;
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
public FloatsRef(FloatsRef other) {
|
||||
copy(other);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object clone() {
|
||||
return new FloatsRef(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 0;
|
||||
final int end = offset + length;
|
||||
for(int i = offset; i < end; i++) {
|
||||
long value = Double.doubleToLongBits(floats[i]);
|
||||
result = prime * result + (int) (value ^ (value >>> 32));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
return other instanceof FloatsRef && this.floatsEquals((FloatsRef) other);
|
||||
}
|
||||
|
||||
public boolean floatsEquals(FloatsRef other) {
|
||||
if (length == other.length) {
|
||||
int otherUpto = other.offset;
|
||||
final double[] otherFloats = other.floats;
|
||||
final int end = offset + length;
|
||||
for(int upto=offset;upto<end;upto++,otherUpto++) {
|
||||
if (floats[upto] != otherFloats[otherUpto]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public void copy(FloatsRef other) {
|
||||
if (floats == null) {
|
||||
floats = new double[other.length];
|
||||
} else {
|
||||
floats = ArrayUtil.grow(floats, other.length);
|
||||
}
|
||||
System.arraycopy(other.floats, other.offset, floats, 0, other.length);
|
||||
length = other.length;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
public void grow(int newLength) {
|
||||
if (floats.length < newLength) {
|
||||
floats = ArrayUtil.grow(floats, newLength);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,109 +0,0 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Represents long[], as a slice (offset + length) into an existing long[].
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class LongsRef implements Cloneable {
|
||||
public long[] ints;
|
||||
public int offset;
|
||||
public int length;
|
||||
|
||||
public LongsRef() {
|
||||
}
|
||||
|
||||
public LongsRef(int capacity) {
|
||||
ints = new long[capacity];
|
||||
}
|
||||
|
||||
public LongsRef(long[] ints, int offset, int length) {
|
||||
this.ints = ints;
|
||||
this.offset = offset;
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
public LongsRef(LongsRef other) {
|
||||
copy(other);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object clone() {
|
||||
return new LongsRef(this);
|
||||
}
|
||||
|
||||
public void set(long value) {
|
||||
ints[offset] = value;
|
||||
}
|
||||
|
||||
public long get() {
|
||||
return ints[offset];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 0;
|
||||
final int end = offset + length;
|
||||
for (int i = offset; i < end; i++) {
|
||||
long value = ints[i];
|
||||
result = prime * result + (int) (value ^ (value >>> 32));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
return this.intsEquals((LongsRef) other);
|
||||
}
|
||||
|
||||
public boolean intsEquals(LongsRef other) {
|
||||
if (length == other.length) {
|
||||
int otherUpto = other.offset;
|
||||
final long[] otherInts = other.ints;
|
||||
final int end = offset + length;
|
||||
for (int upto = offset; upto < end; upto++, otherUpto++) {
|
||||
if (ints[upto] != otherInts[otherUpto]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public void copy(LongsRef other) {
|
||||
if (ints == null) {
|
||||
ints = new long[other.length];
|
||||
} else {
|
||||
ints = ArrayUtil.grow(ints, other.length);
|
||||
}
|
||||
System.arraycopy(other.ints, other.offset, ints, 0, other.length);
|
||||
length = other.length;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
public void grow(int newLength) {
|
||||
if (ints.length < newLength) {
|
||||
ints = ArrayUtil.grow(ints, newLength);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -85,6 +85,14 @@ public class PackedInts {
|
|||
long advance(int ord) throws IOException;
|
||||
}
|
||||
|
||||
public static interface RandomAccessReaderIterator extends ReaderIterator {
|
||||
/**
|
||||
* @param index the position of the wanted value.
|
||||
* @return the value at the stated index.
|
||||
*/
|
||||
long get(int index) throws IOException;
|
||||
}
|
||||
|
||||
/**
|
||||
* A packed integer array that can be modified.
|
||||
* @lucene.internal
|
||||
|
@ -195,6 +203,17 @@ public class PackedInts {
|
|||
* @lucene.internal
|
||||
*/
|
||||
public static ReaderIterator getReaderIterator(IndexInput in) throws IOException {
|
||||
return getRandomAccessReaderIterator(in);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve PackedInts as a {@link RandomAccessReaderIterator}
|
||||
* @param in positioned at the beginning of a stored packed int structure.
|
||||
* @return an iterator to access the values
|
||||
* @throws IOException if the structure could not be retrieved.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static RandomAccessReaderIterator getRandomAccessReaderIterator(IndexInput in) throws IOException {
|
||||
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
|
||||
final int bitsPerValue = in.readVInt();
|
||||
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
|
||||
|
|
|
@ -21,13 +21,15 @@ import org.apache.lucene.store.IndexInput;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
final class PackedReaderIterator implements PackedInts.ReaderIterator {
|
||||
final class PackedReaderIterator implements PackedInts.RandomAccessReaderIterator {
|
||||
private long pending;
|
||||
private int pendingBitsLeft;
|
||||
private final IndexInput in;
|
||||
private final int bitsPerValue;
|
||||
private final int valueCount;
|
||||
private int position = -1;
|
||||
private long currentValue;
|
||||
private final long startPointer;
|
||||
|
||||
// masks[n-1] masks for bottom n bits
|
||||
private final long[] masks;
|
||||
|
@ -39,6 +41,7 @@ final class PackedReaderIterator implements PackedInts.ReaderIterator {
|
|||
this.bitsPerValue = bitsPerValue;
|
||||
|
||||
this.in = in;
|
||||
startPointer = in.getFilePointer();
|
||||
masks = new long[bitsPerValue];
|
||||
|
||||
long v = 1;
|
||||
|
@ -76,7 +79,7 @@ final class PackedReaderIterator implements PackedInts.ReaderIterator {
|
|||
}
|
||||
|
||||
++position;
|
||||
return result;
|
||||
return currentValue = result;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
|
@ -106,6 +109,26 @@ final class PackedReaderIterator implements PackedInts.ReaderIterator {
|
|||
pendingBitsLeft = 64 - (int)(skip % 64);
|
||||
}
|
||||
position = ord-1;
|
||||
return next();
|
||||
return currentValue = next();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long get(int index) throws IOException {
|
||||
assert index < valueCount : "ord must be less than valueCount";
|
||||
if (index < position) {
|
||||
pendingBitsLeft = 0;
|
||||
final long bitsToSkip = (((long) bitsPerValue) * (long) index);
|
||||
final long skip = bitsToSkip - pendingBitsLeft;
|
||||
final long closestByte = (skip >> 6) << 3;
|
||||
in.seek(startPointer + closestByte);
|
||||
pending = in.readLong();
|
||||
pendingBitsLeft = 64 - (int) (skip % 64);
|
||||
position = index - 1;
|
||||
return currentValue = next();
|
||||
} else if (index == position) {
|
||||
return currentValue;
|
||||
}
|
||||
return advance(index);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -176,8 +176,8 @@ public class RandomIndexWriter implements Closeable {
|
|||
IndexDocValuesField docValuesField = new IndexDocValuesField(name);
|
||||
switch (type) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_FIXED_SORTED:
|
||||
final String randomUnicodeString = _TestUtil.randomUnicodeString(random, fixedBytesLength);
|
||||
BytesRef fixedRef = new BytesRef(randomUnicodeString);
|
||||
if (fixedRef.length > fixedBytesLength) {
|
||||
|
@ -189,8 +189,8 @@ public class RandomIndexWriter implements Closeable {
|
|||
docValuesField.setBytes(fixedRef, type);
|
||||
break;
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
case BYTES_VAR_SORTED:
|
||||
BytesRef ref = new BytesRef(_TestUtil.randomUnicodeString(random, 200));
|
||||
docValuesField.setBytes(ref, type);
|
||||
break;
|
||||
|
|
|
@ -25,14 +25,12 @@ import org.apache.lucene.index.values.IndexDocValues.Source;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.FloatsRef;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestDocValues extends LuceneTestCase {
|
||||
|
||||
private static final Comparator<BytesRef> COMP = BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
// TODO -- for sorted test, do our own Sort of the
|
||||
// values and verify it's identical
|
||||
|
||||
|
@ -45,23 +43,20 @@ public class TestDocValues extends LuceneTestCase {
|
|||
runTestBytes(Bytes.Mode.DEREF, true);
|
||||
runTestBytes(Bytes.Mode.DEREF, false);
|
||||
}
|
||||
|
||||
|
||||
public void testBytesSorted() throws IOException {
|
||||
runTestBytes(Bytes.Mode.SORTED, true);
|
||||
runTestBytes(Bytes.Mode.SORTED, false);
|
||||
}
|
||||
|
||||
|
||||
public void runTestBytes(final Bytes.Mode mode, final boolean fixedSize)
|
||||
throws IOException {
|
||||
|
||||
final BytesRef bytesRef = new BytesRef();
|
||||
|
||||
final Comparator<BytesRef> comp = mode == Bytes.Mode.SORTED ? BytesRef
|
||||
.getUTF8SortedAsUnicodeComparator() : null;
|
||||
|
||||
Directory dir = newDirectory();
|
||||
final Counter trackBytes = Counter.newCounter();
|
||||
Writer w = Bytes.getWriter(dir, "test", mode, comp, fixedSize, trackBytes, newIOContext(random));
|
||||
Writer w = Bytes.getWriter(dir, "test", mode, fixedSize, COMP, trackBytes, newIOContext(random));
|
||||
int maxDoc = 220;
|
||||
final String[] values = new String[maxDoc];
|
||||
final int fixedLength = 1 + atLeast(50);
|
||||
|
@ -81,24 +76,7 @@ public class TestDocValues extends LuceneTestCase {
|
|||
w.finish(maxDoc);
|
||||
assertEquals(0, trackBytes.get());
|
||||
|
||||
IndexDocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc, comp, newIOContext(random));
|
||||
for (int iter = 0; iter < 2; iter++) {
|
||||
ValuesEnum bytesEnum = getEnum(r);
|
||||
assertNotNull("enum is null", bytesEnum);
|
||||
BytesRef ref = bytesEnum.bytes();
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
final int idx = 2 * i;
|
||||
assertEquals("doc: " + idx, idx, bytesEnum.advance(idx));
|
||||
String utf8String = ref.utf8ToString();
|
||||
assertEquals("doc: " + idx + " lenLeft: " + values[idx].length()
|
||||
+ " lenRight: " + utf8String.length(), values[idx], utf8String);
|
||||
}
|
||||
assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc));
|
||||
assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc + 1));
|
||||
|
||||
bytesEnum.close();
|
||||
}
|
||||
IndexDocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc, COMP, newIOContext(random));
|
||||
|
||||
// Verify we can load source twice:
|
||||
for (int iter = 0; iter < 2; iter++) {
|
||||
|
@ -106,7 +84,7 @@ public class TestDocValues extends LuceneTestCase {
|
|||
IndexDocValues.SortedSource ss;
|
||||
if (mode == Bytes.Mode.SORTED) {
|
||||
// default is unicode so we can simply pass null here
|
||||
s = ss = getSortedSource(r, random.nextBoolean() ? comp : null);
|
||||
s = ss = getSortedSource(r);
|
||||
} else {
|
||||
s = getSource(r);
|
||||
ss = null;
|
||||
|
@ -121,7 +99,7 @@ public class TestDocValues extends LuceneTestCase {
|
|||
assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx),
|
||||
bytesRef).utf8ToString());
|
||||
int ord = ss
|
||||
.getByValue(new BytesRef(values[idx]));
|
||||
.getByValue(new BytesRef(values[idx]), new BytesRef());
|
||||
assertTrue(ord >= 0);
|
||||
assertEquals(ss.ord(idx), ord);
|
||||
}
|
||||
|
@ -129,10 +107,10 @@ public class TestDocValues extends LuceneTestCase {
|
|||
|
||||
// Lookup random strings:
|
||||
if (mode == Bytes.Mode.SORTED) {
|
||||
final int numValues = ss.getValueCount();
|
||||
final int valueCount = ss.getValueCount();
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
BytesRef bytesValue = new BytesRef(_TestUtil.randomFixedByteLengthUnicodeString(random, fixedSize? fixedLength : 1 + random.nextInt(39)));
|
||||
int ord = ss.getByValue(bytesValue);
|
||||
int ord = ss.getByValue(bytesValue, new BytesRef());
|
||||
if (ord >= 0) {
|
||||
assertTrue(bytesValue
|
||||
.bytesEquals(ss.getByOrd(ord, bytesRef)));
|
||||
|
@ -151,22 +129,23 @@ public class TestDocValues extends LuceneTestCase {
|
|||
final BytesRef firstRef = ss.getByOrd(1, bytesRef);
|
||||
// random string was before our first
|
||||
assertTrue(firstRef.compareTo(bytesValue) > 0);
|
||||
} else if (insertIndex == numValues) {
|
||||
final BytesRef lastRef = ss.getByOrd(numValues-1, bytesRef);
|
||||
} else if (insertIndex == valueCount) {
|
||||
final BytesRef lastRef = ss.getByOrd(valueCount-1, bytesRef);
|
||||
// random string was after our last
|
||||
assertTrue(lastRef.compareTo(bytesValue) < 0);
|
||||
} else {
|
||||
final BytesRef before = (BytesRef) ss.getByOrd(insertIndex-1, bytesRef)
|
||||
.clone();
|
||||
BytesRef after = ss.getByOrd(insertIndex, bytesRef);
|
||||
assertTrue(comp.compare(before, bytesValue) < 0);
|
||||
assertTrue(comp.compare(bytesValue, after) < 0);
|
||||
assertTrue(COMP.compare(before, bytesValue) < 0);
|
||||
assertTrue(COMP.compare(bytesValue, after) < 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
@ -194,14 +173,6 @@ public class TestDocValues extends LuceneTestCase {
|
|||
expectedTypes[i], source.type());
|
||||
assertEquals(minMax[i][0], source.getInt(0));
|
||||
assertEquals(minMax[i][1], source.getInt(1));
|
||||
ValuesEnum iEnum = getEnum(r);
|
||||
assertEquals(i + " with min: " + minMax[i][0] + " max: " + minMax[i][1],
|
||||
expectedTypes[i], iEnum.type());
|
||||
assertEquals(0, iEnum.nextDoc());
|
||||
assertEquals(minMax[i][0], iEnum.intsRef.get());
|
||||
assertEquals(1, iEnum.nextDoc());
|
||||
assertEquals(minMax[i][1], iEnum.intsRef.get());
|
||||
assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc());
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
|
@ -308,12 +279,12 @@ public class TestDocValues extends LuceneTestCase {
|
|||
float[] sourceArray = new float[] {1,2,3};
|
||||
Directory dir = newDirectory();
|
||||
final Counter trackBytes = Counter.newCounter();
|
||||
Writer w = Floats.getWriter(dir, "test", 4, trackBytes, newIOContext(random));
|
||||
Writer w = Floats.getWriter(dir, "test", trackBytes, newIOContext(random), ValueType.FLOAT_32);
|
||||
for (int i = 0; i < sourceArray.length; i++) {
|
||||
w.add(i, sourceArray[i]);
|
||||
}
|
||||
w.finish(sourceArray.length);
|
||||
IndexDocValues r = Floats.getValues(dir, "test", 3, newIOContext(random));
|
||||
IndexDocValues r = Floats.getValues(dir, "test", 3, newIOContext(random), ValueType.FLOAT_32);
|
||||
Source source = r.getSource();
|
||||
assertTrue(source.hasArray());
|
||||
float[] loaded = ((float[])source.getArray());
|
||||
|
@ -329,12 +300,12 @@ public class TestDocValues extends LuceneTestCase {
|
|||
double[] sourceArray = new double[] {1,2,3};
|
||||
Directory dir = newDirectory();
|
||||
final Counter trackBytes = Counter.newCounter();
|
||||
Writer w = Floats.getWriter(dir, "test", 8, trackBytes, newIOContext(random));
|
||||
Writer w = Floats.getWriter(dir, "test", trackBytes, newIOContext(random), ValueType.FLOAT_64);
|
||||
for (int i = 0; i < sourceArray.length; i++) {
|
||||
w.add(i, sourceArray[i]);
|
||||
}
|
||||
w.finish(sourceArray.length);
|
||||
IndexDocValues r = Floats.getValues(dir, "test", 3, newIOContext(random));
|
||||
IndexDocValues r = Floats.getValues(dir, "test", 3, newIOContext(random), ValueType.FLOAT_64);
|
||||
Source source = r.getSource();
|
||||
assertTrue(source.hasArray());
|
||||
double[] loaded = ((double[])source.getArray());
|
||||
|
@ -373,54 +344,23 @@ public class TestDocValues extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
for (int iter = 0; iter < 2; iter++) {
|
||||
ValuesEnum iEnum = getEnum(r);
|
||||
assertEquals(type, iEnum.type());
|
||||
LongsRef ints = iEnum.getInt();
|
||||
for (int i = 0; i < NUM_VALUES + additionalDocs; i++) {
|
||||
assertEquals(i, iEnum.nextDoc());
|
||||
if (i < NUM_VALUES) {
|
||||
assertEquals(values[i], ints.get());
|
||||
} else {
|
||||
assertEquals(0, ints.get());
|
||||
}
|
||||
}
|
||||
assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc());
|
||||
iEnum.close();
|
||||
}
|
||||
|
||||
for (int iter = 0; iter < 2; iter++) {
|
||||
ValuesEnum iEnum = getEnum(r);
|
||||
assertEquals(type, iEnum.type());
|
||||
LongsRef ints = iEnum.getInt();
|
||||
for (int i = 0; i < NUM_VALUES + additionalDocs; i += 1 + random.nextInt(25)) {
|
||||
assertEquals(i, iEnum.advance(i));
|
||||
if (i < NUM_VALUES) {
|
||||
assertEquals(values[i], ints.get());
|
||||
} else {
|
||||
assertEquals(0, ints.get());
|
||||
}
|
||||
}
|
||||
assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.advance(NUM_VALUES + additionalDocs));
|
||||
iEnum.close();
|
||||
}
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void testFloats4() throws IOException {
|
||||
runTestFloats(4, 0.00001);
|
||||
runTestFloats(ValueType.FLOAT_32, 0.00001);
|
||||
}
|
||||
|
||||
private void runTestFloats(int precision, double delta) throws IOException {
|
||||
private void runTestFloats(ValueType type, double delta) throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
final Counter trackBytes = Counter.newCounter();
|
||||
Writer w = Floats.getWriter(dir, "test", precision, trackBytes, newIOContext(random));
|
||||
Writer w = Floats.getWriter(dir, "test", trackBytes, newIOContext(random), type);
|
||||
final int NUM_VALUES = 777 + random.nextInt(777);;
|
||||
final double[] values = new double[NUM_VALUES];
|
||||
for (int i = 0; i < NUM_VALUES; i++) {
|
||||
final double v = precision == 4 ? random.nextFloat() : random
|
||||
final double v = type == ValueType.FLOAT_32 ? random.nextFloat() : random
|
||||
.nextDouble();
|
||||
values[i] = v;
|
||||
w.add(i, v);
|
||||
|
@ -429,64 +369,38 @@ public class TestDocValues extends LuceneTestCase {
|
|||
w.finish(NUM_VALUES + additionalValues);
|
||||
assertEquals(0, trackBytes.get());
|
||||
|
||||
IndexDocValues r = Floats.getValues(dir, "test", NUM_VALUES + additionalValues, newIOContext(random));
|
||||
IndexDocValues r = Floats.getValues(dir, "test", NUM_VALUES + additionalValues, newIOContext(random), type);
|
||||
for (int iter = 0; iter < 2; iter++) {
|
||||
Source s = getSource(r);
|
||||
for (int i = 0; i < NUM_VALUES; i++) {
|
||||
assertEquals(values[i], s.getFloat(i), 0.0f);
|
||||
assertEquals("" + i, values[i], s.getFloat(i), 0.0f);
|
||||
}
|
||||
}
|
||||
|
||||
for (int iter = 0; iter < 2; iter++) {
|
||||
ValuesEnum fEnum = getEnum(r);
|
||||
FloatsRef floats = fEnum.getFloat();
|
||||
for (int i = 0; i < NUM_VALUES + additionalValues; i++) {
|
||||
assertEquals(i, fEnum.nextDoc());
|
||||
if (i < NUM_VALUES) {
|
||||
assertEquals(values[i], floats.get(), delta);
|
||||
} else {
|
||||
assertEquals(0.0d, floats.get(), delta);
|
||||
}
|
||||
}
|
||||
assertEquals(ValuesEnum.NO_MORE_DOCS, fEnum.nextDoc());
|
||||
fEnum.close();
|
||||
}
|
||||
for (int iter = 0; iter < 2; iter++) {
|
||||
ValuesEnum fEnum = getEnum(r);
|
||||
FloatsRef floats = fEnum.getFloat();
|
||||
for (int i = 0; i < NUM_VALUES + additionalValues; i += 1 + random.nextInt(25)) {
|
||||
assertEquals(i, fEnum.advance(i));
|
||||
if (i < NUM_VALUES) {
|
||||
assertEquals(values[i], floats.get(), delta);
|
||||
} else {
|
||||
assertEquals(0.0d, floats.get(), delta);
|
||||
}
|
||||
}
|
||||
assertEquals(ValuesEnum.NO_MORE_DOCS, fEnum.advance(NUM_VALUES + additionalValues));
|
||||
fEnum.close();
|
||||
}
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testFloats8() throws IOException {
|
||||
runTestFloats(8, 0.0);
|
||||
runTestFloats(ValueType.FLOAT_64, 0.0);
|
||||
}
|
||||
|
||||
private ValuesEnum getEnum(IndexDocValues values) throws IOException {
|
||||
return random.nextBoolean() ? values.getEnum() : getSource(values).getEnum();
|
||||
}
|
||||
|
||||
private Source getSource(IndexDocValues values) throws IOException {
|
||||
// getSource uses cache internally
|
||||
return random.nextBoolean() ? values.load() : values.getSource();
|
||||
switch(random.nextInt(5)) {
|
||||
case 3:
|
||||
return values.load();
|
||||
case 2:
|
||||
return values.getDirectSource();
|
||||
case 1:
|
||||
return values.getSource();
|
||||
default:
|
||||
return values.getSource();
|
||||
}
|
||||
}
|
||||
|
||||
private SortedSource getSortedSource(IndexDocValues values,
|
||||
Comparator<BytesRef> comparator) throws IOException {
|
||||
// getSortedSource uses cache internally
|
||||
return random.nextBoolean() ? values.loadSorted(comparator) : values
|
||||
.getSortedSorted(comparator);
|
||||
|
||||
private SortedSource getSortedSource(IndexDocValues values) throws IOException {
|
||||
return getSource(values).asSortedSource();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -47,8 +47,6 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.LockObtainFailedException;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.FloatsRef;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.junit.Before;
|
||||
|
@ -136,7 +134,6 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
Collections.shuffle(values, random);
|
||||
ValueType first = values.get(0);
|
||||
ValueType second = values.get(1);
|
||||
String msg = "[first=" + first.name() + ", second=" + second.name() + "]";
|
||||
// index first index
|
||||
Directory d_1 = newDirectory();
|
||||
IndexWriter w_1 = new IndexWriter(d_1, writerConfig(random.nextBoolean()));
|
||||
|
@ -171,36 +168,66 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
// check values
|
||||
|
||||
IndexReader merged = IndexReader.open(w, true);
|
||||
ValuesEnum vE_1 = getValuesEnum(getDocValues(r_1, first.name()));
|
||||
ValuesEnum vE_2 = getValuesEnum(getDocValues(r_2, second.name()));
|
||||
ValuesEnum vE_1_merged = getValuesEnum(getDocValues(merged, first.name()));
|
||||
ValuesEnum vE_2_merged = getValuesEnum(getDocValues(merged, second
|
||||
Source source_1 = getSource(getDocValues(r_1, first.name()));
|
||||
Source source_2 = getSource(getDocValues(r_2, second.name()));
|
||||
Source source_1_merged = getSource(getDocValues(merged, first.name()));
|
||||
Source source_2_merged = getSource(getDocValues(merged, second
|
||||
.name()));
|
||||
switch (second) { // these variants don't advance over missing values
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
case FLOAT_32:
|
||||
case FLOAT_64:
|
||||
case VAR_INTS:
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
assertEquals(msg, valuesPerIndex-1, vE_2_merged.advance(valuesPerIndex-1));
|
||||
for (int i = 0; i < r_1.maxDoc(); i++) {
|
||||
switch (first) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_VAR_SORTED:
|
||||
assertEquals(source_1.getBytes(i, new BytesRef()),
|
||||
source_1_merged.getBytes(i, new BytesRef()));
|
||||
break;
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
case VAR_INTS:
|
||||
assertEquals(source_1.getInt(i), source_1_merged.getInt(i));
|
||||
break;
|
||||
case FLOAT_32:
|
||||
case FLOAT_64:
|
||||
assertEquals(source_1.getFloat(i), source_1_merged.getFloat(i), 0.0d);
|
||||
break;
|
||||
default:
|
||||
fail("unkonwn " + first);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < valuesPerIndex; i++) {
|
||||
assertEquals(msg, i, vE_1.nextDoc());
|
||||
assertEquals(msg, i, vE_1_merged.nextDoc());
|
||||
|
||||
assertEquals(msg, i, vE_2.nextDoc());
|
||||
assertEquals(msg, i + valuesPerIndex, vE_2_merged.nextDoc());
|
||||
for (int i = r_1.maxDoc(); i < merged.maxDoc(); i++) {
|
||||
switch (second) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_VAR_SORTED:
|
||||
assertEquals(source_2.getBytes(i - r_1.maxDoc(), new BytesRef()),
|
||||
source_2_merged.getBytes(i, new BytesRef()));
|
||||
break;
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
case VAR_INTS:
|
||||
assertEquals(source_2.getInt(i - r_1.maxDoc()),
|
||||
source_2_merged.getInt(i));
|
||||
break;
|
||||
case FLOAT_32:
|
||||
case FLOAT_64:
|
||||
assertEquals(source_2.getFloat(i - r_1.maxDoc()),
|
||||
source_2_merged.getFloat(i), 0.0d);
|
||||
break;
|
||||
default:
|
||||
fail("unkonwn " + first);
|
||||
}
|
||||
}
|
||||
assertEquals(msg, ValuesEnum.NO_MORE_DOCS, vE_1.nextDoc());
|
||||
assertEquals(msg, ValuesEnum.NO_MORE_DOCS, vE_2.nextDoc());
|
||||
assertEquals(msg, ValuesEnum.NO_MORE_DOCS, vE_1_merged.advance(valuesPerIndex*2));
|
||||
assertEquals(msg, ValuesEnum.NO_MORE_DOCS, vE_2_merged.nextDoc());
|
||||
|
||||
// close resources
|
||||
r_1.close();
|
||||
r_2.close();
|
||||
|
@ -260,22 +287,12 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
assertEquals("index " + i, 0, value);
|
||||
}
|
||||
|
||||
ValuesEnum intsEnum = getValuesEnum(intsReader);
|
||||
assertTrue(intsEnum.advance(base) >= base);
|
||||
|
||||
intsEnum = getValuesEnum(intsReader);
|
||||
LongsRef enumRef = intsEnum.getInt();
|
||||
|
||||
int expected = 0;
|
||||
for (int i = base; i < r.numDocs(); i++, expected++) {
|
||||
while (deleted.get(expected)) {
|
||||
expected++;
|
||||
}
|
||||
assertEquals("advance failed at index: " + i + " of " + r.numDocs()
|
||||
+ " docs", i, intsEnum.advance(i));
|
||||
assertEquals(val + " mod: " + mod + " index: " + i, expected%mod, ints.getInt(i));
|
||||
assertEquals(expected%mod, enumRef.get());
|
||||
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -289,20 +306,11 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
assertEquals(val + " failed for doc: " + i + " base: " + base,
|
||||
0.0d, value, 0.0d);
|
||||
}
|
||||
ValuesEnum floatEnum = getValuesEnum(floatReader);
|
||||
assertTrue(floatEnum.advance(base) >= base);
|
||||
|
||||
floatEnum = getValuesEnum(floatReader);
|
||||
FloatsRef enumRef = floatEnum.getFloat();
|
||||
int expected = 0;
|
||||
for (int i = base; i < r.numDocs(); i++, expected++) {
|
||||
while (deleted.get(expected)) {
|
||||
expected++;
|
||||
}
|
||||
assertEquals("advance failed at index: " + i + " of " + r.numDocs()
|
||||
+ " docs base:" + base, i, floatEnum.advance(i));
|
||||
assertEquals(floatEnum.getClass() + " index " + i, 2.0 * expected,
|
||||
enumRef.get(), 0.00001);
|
||||
assertEquals("index " + i, 2.0 * expected, floats.getFloat(i),
|
||||
0.00001);
|
||||
}
|
||||
|
@ -320,7 +328,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
w.close();
|
||||
d.close();
|
||||
}
|
||||
|
||||
|
||||
public void runTestIndexBytes(IndexWriterConfig cfg, boolean withDeletions)
|
||||
throws CorruptIndexException, LockObtainFailedException, IOException {
|
||||
final Directory d = newDirectory();
|
||||
|
@ -353,6 +361,8 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
switch (byteIndexValue) {
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
// fixed straight returns bytesref with zero bytes all of fixed
|
||||
// length
|
||||
assertNotNull("expected none null - " + msg, br);
|
||||
|
@ -365,23 +375,13 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
break;
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_FIXED_DEREF:
|
||||
default:
|
||||
assertNotNull("expected none null - " + msg, br);
|
||||
assertEquals(0, br.length);
|
||||
assertEquals(byteIndexValue + "", 0, br.length);
|
||||
// make sure we advance at least until base
|
||||
ValuesEnum bytesEnum = getValuesEnum(bytesReader);
|
||||
final int advancedTo = bytesEnum.advance(0);
|
||||
assertTrue(byteIndexValue.name() + " advanced failed base:" + base
|
||||
+ " advancedTo: " + advancedTo, base <= advancedTo);
|
||||
}
|
||||
}
|
||||
|
||||
ValuesEnum bytesEnum = getValuesEnum(bytesReader);
|
||||
final BytesRef enumRef = bytesEnum.bytes();
|
||||
// test the actual doc values added in this iteration
|
||||
assertEquals(base + numRemainingValues, r.numDocs());
|
||||
int v = 0;
|
||||
|
@ -393,17 +393,8 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
upto += bytesSize;
|
||||
}
|
||||
BytesRef br = bytes.getBytes(i, new BytesRef());
|
||||
if (bytesEnum.docID() != i) {
|
||||
assertEquals("seek failed for index " + i + " " + msg, i, bytesEnum
|
||||
.advance(i));
|
||||
}
|
||||
assertTrue(msg, br.length > 0);
|
||||
for (int j = 0; j < br.length; j++, upto++) {
|
||||
assertTrue(" enumRef not initialized " + msg,
|
||||
enumRef.bytes.length > 0);
|
||||
assertEquals(
|
||||
"EnumRef Byte at index " + j + " doesn't match - " + msg, upto,
|
||||
enumRef.bytes[enumRef.offset + j]);
|
||||
if (!(br.bytes.length > br.offset + j))
|
||||
br = bytes.getBytes(i, new BytesRef());
|
||||
assertTrue("BytesRef index exceeded [" + msg + "] offset: "
|
||||
|
@ -446,33 +437,23 @@ public class TestDocValuesIndexing extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private Source getSource(IndexDocValues values) throws IOException {
|
||||
Source source;
|
||||
if (random.nextInt(10) == 0) {
|
||||
source = values.load();
|
||||
} else {
|
||||
// getSource uses cache internally
|
||||
source = values.getSource();
|
||||
// getSource uses cache internally
|
||||
switch(random.nextInt(5)) {
|
||||
case 3:
|
||||
return values.load();
|
||||
case 2:
|
||||
return values.getDirectSource();
|
||||
case 1:
|
||||
return values.getSource();
|
||||
default:
|
||||
return values.getSource();
|
||||
}
|
||||
assertNotNull(source);
|
||||
return source;
|
||||
}
|
||||
|
||||
private ValuesEnum getValuesEnum(IndexDocValues values) throws IOException {
|
||||
ValuesEnum valuesEnum;
|
||||
if (!(values instanceof MultiIndexDocValues) && random.nextInt(10) == 0) {
|
||||
// TODO not supported by MultiDocValues yet!
|
||||
valuesEnum = getSource(values).getEnum();
|
||||
} else {
|
||||
valuesEnum = values.getEnum();
|
||||
|
||||
}
|
||||
assertNotNull(valuesEnum);
|
||||
return valuesEnum;
|
||||
}
|
||||
|
||||
private static EnumSet<ValueType> BYTES = EnumSet.of(ValueType.BYTES_FIXED_DEREF,
|
||||
ValueType.BYTES_FIXED_SORTED, ValueType.BYTES_FIXED_STRAIGHT, ValueType.BYTES_VAR_DEREF,
|
||||
ValueType.BYTES_VAR_SORTED, ValueType.BYTES_VAR_STRAIGHT);
|
||||
ValueType.BYTES_FIXED_STRAIGHT, ValueType.BYTES_VAR_DEREF,
|
||||
ValueType.BYTES_VAR_STRAIGHT, ValueType.BYTES_FIXED_SORTED, ValueType.BYTES_VAR_SORTED);
|
||||
|
||||
private static EnumSet<ValueType> NUMERICS = EnumSet.of(ValueType.VAR_INTS,
|
||||
ValueType.FIXED_INTS_16, ValueType.FIXED_INTS_32,
|
||||
|
|
|
@ -115,12 +115,38 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
assertEquals(fp, in.getFilePointer());
|
||||
in.close();
|
||||
}
|
||||
|
||||
{ // test reader iterator get
|
||||
IndexInput in = d.openInput("out.bin", newIOContext(random));
|
||||
PackedInts.RandomAccessReaderIterator intsEnum = PackedInts.getRandomAccessReaderIterator(in);
|
||||
for (int i = 0; i < valueCount; i++) {
|
||||
final String msg = "index=" + i + " ceil=" + ceil + " valueCount="
|
||||
+ valueCount + " nbits=" + nbits + " for "
|
||||
+ intsEnum.getClass().getSimpleName();
|
||||
final int ord = random.nextInt(valueCount);
|
||||
long seek = intsEnum.get(ord);
|
||||
assertEquals(msg, seek, values[ord]);
|
||||
if (random.nextBoolean() && ord < valueCount-1) {
|
||||
if (random.nextBoolean()) {
|
||||
assertEquals(msg, values[ord+1], intsEnum.advance(ord+1));
|
||||
} else {
|
||||
assertEquals(msg, values[ord+1], intsEnum.next());
|
||||
}
|
||||
}
|
||||
}
|
||||
if (intsEnum.ord() < valueCount - 1)
|
||||
assertEquals(values[valueCount - 1], intsEnum
|
||||
.advance(valueCount - 1));
|
||||
assertEquals(valueCount - 1, intsEnum.ord());
|
||||
assertEquals(fp, in.getFilePointer());
|
||||
in.close();
|
||||
}
|
||||
ceil *= 2;
|
||||
d.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void testControlledEquality() {
|
||||
final int VALUE_COUNT = 255;
|
||||
final int BITS_PER_VALUE = 8;
|
||||
|
|
Loading…
Reference in New Issue