LUCENE-3433: Random access non RAM resident IndexDocValues (CSF)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1179970 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2011-10-07 09:05:10 +00:00
parent 70ee6dbdb6
commit 63b736c033
33 changed files with 1223 additions and 2318 deletions

View File

@ -548,6 +548,10 @@ New features
* LUCENE-2309: Added IndexableField.tokenStream(Analyzer) which is now
responsible for creating the TokenStreams for Fields when they are to
be indexed. (Chris Male)
* LUCENE-3433: Added random access for non RAM resident IndexDocValues. RAM
resident and disk resident IndexDocValues are now exposed via the Source
interface. ValuesEnum has been removed in favour of Source. (Simon Willnauer)
Optimizations

View File

@ -19,7 +19,6 @@ package org.apache.lucene.document;
import java.io.Reader;
import java.util.Comparator;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.values.PerDocFieldValues;
import org.apache.lucene.index.values.ValueType;
@ -317,21 +316,34 @@ public class IndexDocValuesField extends Field implements PerDocFieldValues {
final String value;
switch (type) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
value = "bytes:bytes.utf8ToString();";
case BYTES_FIXED_SORTED:
case BYTES_VAR_SORTED:
// don't use to unicode string this is not necessarily unicode here
value = "bytes: " + bytes.toString();
break;
case FIXED_INTS_16:
value = "int16: " + longValue;
break;
case FIXED_INTS_32:
value = "int32: " + longValue;
break;
case FIXED_INTS_64:
value = "int64: " + longValue;
break;
case FIXED_INTS_8:
value = "int8: " + longValue;
break;
case VAR_INTS:
value = "int:" + longValue;
value = "vint: " + longValue;
break;
case FLOAT_32:
value = "float32:" + doubleValue;
value = "float32: " + doubleValue;
break;
case FLOAT_64:
value = "float64:" + doubleValue;
value = "float64: " + doubleValue;
break;
default:
throw new IllegalArgumentException("unknown type: " + type);
@ -353,14 +365,18 @@ public class IndexDocValuesField extends Field implements PerDocFieldValues {
final IndexDocValuesField valField = new IndexDocValuesField(field.name(), field.fieldType(), field.stringValue());
switch (type) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
case BYTES_FIXED_SORTED:
case BYTES_VAR_SORTED:
BytesRef ref = field.isBinary() ? field.binaryValue() : new BytesRef(field.stringValue());
valField.setBytes(ref, type);
break;
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
case VAR_INTS:
valField.setInt(Long.parseLong(field.stringValue()));
break;

View File

@ -41,7 +41,7 @@ import java.util.Map;
import org.apache.lucene.index.codecs.BlockTreeTermsReader;
import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.index.values.IndexDocValues;
import org.apache.lucene.index.values.ValuesEnum;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -1070,27 +1070,28 @@ public class CheckIndex {
if (docValues == null) {
continue;
}
final ValuesEnum values = docValues.getEnum();
while (values.nextDoc() != ValuesEnum.NO_MORE_DOCS) {
final Source values = docValues.getDirectSource();
final int maxDoc = reader.maxDoc();
for (int i = 0; i < maxDoc; i++) {
switch (fieldInfo.docValues) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_VAR_SORTED:
case BYTES_FIXED_DEREF:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
values.bytes();
values.getBytes(i, new BytesRef());
break;
case FLOAT_32:
case FLOAT_64:
values.getFloat();
values.getFloat(i);
break;
case VAR_INTS:
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
values.getInt();
values.getInt(i);
break;
default:
throw new IllegalArgumentException("Field: " + fieldInfo.name

View File

@ -652,31 +652,30 @@ public final class FieldInfos implements Iterable<FieldInfo> {
case BYTES_FIXED_DEREF:
b = 5;
break;
case BYTES_FIXED_SORTED:
case BYTES_VAR_STRAIGHT:
b = 6;
break;
case BYTES_VAR_STRAIGHT:
case BYTES_VAR_DEREF:
b = 7;
break;
case BYTES_VAR_DEREF:
case FIXED_INTS_16:
b = 8;
break;
case BYTES_VAR_SORTED:
case FIXED_INTS_32:
b = 9;
break;
case FIXED_INTS_16:
case FIXED_INTS_64:
b = 10;
break;
case FIXED_INTS_32:
case FIXED_INTS_8:
b = 11;
break;
case FIXED_INTS_64:
case BYTES_FIXED_SORTED:
b = 12;
break;
case FIXED_INTS_8:
case BYTES_VAR_SORTED:
b = 13;
break;
default:
throw new IllegalStateException("unhandled indexValues type " + fi.docValues);
}
@ -754,29 +753,29 @@ public final class FieldInfos implements Iterable<FieldInfo> {
docValuesType = ValueType.BYTES_FIXED_DEREF;
break;
case 6:
docValuesType = ValueType.BYTES_FIXED_SORTED;
break;
case 7:
docValuesType = ValueType.BYTES_VAR_STRAIGHT;
break;
case 8:
case 7:
docValuesType = ValueType.BYTES_VAR_DEREF;
break;
case 9:
docValuesType = ValueType.BYTES_VAR_SORTED;
break;
case 10:
case 8:
docValuesType = ValueType.FIXED_INTS_16;
break;
case 11:
case 9:
docValuesType = ValueType.FIXED_INTS_32;
break;
case 12:
case 10:
docValuesType = ValueType.FIXED_INTS_64;
break;
case 13:
case 11:
docValuesType = ValueType.FIXED_INTS_8;
break;
break;
case 12:
docValuesType = ValueType.BYTES_FIXED_SORTED;
break;
case 13:
docValuesType = ValueType.BYTES_VAR_SORTED;
break;
default:
throw new IllegalStateException("unhandled indexValues type " + b);

View File

@ -58,11 +58,11 @@ public abstract class DocValuesReaderBase extends PerDocValues {
public Collection<String> fields() {
return docValues().keySet();
}
public Comparator<BytesRef> getComparator() throws IOException {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
// Only opens files... doesn't actually load any values
protected TreeMap<String, IndexDocValues> load(FieldInfos fieldInfos,
String segment, int docCount, Directory dir, int codecId, IOContext context)
@ -121,9 +121,9 @@ public abstract class DocValuesReaderBase extends PerDocValues {
case VAR_INTS:
return Ints.getValues(dir, id, docCount, type, context);
case FLOAT_32:
return Floats.getValues(dir, id, docCount, context);
return Floats.getValues(dir, id, docCount, context, type);
case FLOAT_64:
return Floats.getValues(dir, id, docCount, context);
return Floats.getValues(dir, id, docCount, context, type);
case BYTES_FIXED_STRAIGHT:
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount, getComparator(), context);
case BYTES_FIXED_DEREF:

View File

@ -54,7 +54,7 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
@Override
public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
return Writer.create(field.getDocValues(),
docValuesId(segmentName, codecId, field.number),
docValuesId(segmentName, codecId, field.number),
getDirectory(), getComparator(), bytesUsed, context);
}
@ -62,6 +62,7 @@ public abstract class DocValuesWriterBase extends PerDocConsumer {
return segmentsName + "_" + codecID + "-" + fieldId;
}
public Comparator<BytesRef> getComparator() throws IOException {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}

View File

@ -56,9 +56,9 @@ public class SepDocValuesConsumer extends DocValuesWriterBase {
switch (fieldInfo.getDocValues()) {
case BYTES_FIXED_DEREF:
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_FIXED_SORTED:
case BYTES_VAR_STRAIGHT:
case BYTES_FIXED_SORTED:
case BYTES_VAR_SORTED:
files.add(IndexFileNames.segmentFileName(filename, "",
Writer.INDEX_EXTENSION));
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
@ -77,7 +77,6 @@ public class SepDocValuesConsumer extends DocValuesWriterBase {
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
Writer.DATA_EXTENSION));
break;
default:
assert false;
}

View File

@ -26,14 +26,12 @@ import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.values.IndexDocValues.SortedSource;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
@ -50,8 +48,7 @@ import org.apache.lucene.util.packed.PackedInts;
/**
* Provides concrete Writer/Reader implementations for <tt>byte[]</tt> value per
* document. There are 6 package-private default implementations of this, for
* all combinations of {@link Mode#DEREF}/{@link Mode#STRAIGHT}/
* {@link Mode#SORTED} x fixed-length/variable-length.
* all combinations of {@link Mode#DEREF}/{@link Mode#STRAIGHT} x fixed-length/variable-length.
*
* <p>
* NOTE: Currently the total amount of byte[] data stored (across a single
@ -101,11 +98,12 @@ public final class Bytes {
* the segment name and a unique id per segment.
* @param mode
* the writers store mode
* @param comp
* a {@link BytesRef} comparator - only used with {@link Mode#SORTED}
* @param fixedSize
* <code>true</code> if all bytes subsequently passed to the
* {@link Writer} will have the same length
* @param sortComparator {@link BytesRef} comparator used by sorted variants.
* If <code>null</code> {@link BytesRef#getUTF8SortedAsUnicodeComparator()}
* is used instead
* @param bytesUsed
* an {@link AtomicLong} instance to track the used bytes within the
* {@link Writer}. A call to {@link Writer#finish(int)} will release
@ -117,12 +115,12 @@ public final class Bytes {
* if the files for the writer can not be created.
*/
public static Writer getWriter(Directory dir, String id, Mode mode,
Comparator<BytesRef> comp, boolean fixedSize, Counter bytesUsed, IOContext context)
boolean fixedSize, Comparator<BytesRef> sortComparator, Counter bytesUsed, IOContext context)
throws IOException {
// TODO -- i shouldn't have to specify fixed? can
// track itself & do the write thing at write time?
if (comp == null) {
comp = BytesRef.getUTF8SortedAsUnicodeComparator();
if (sortComparator == null) {
sortComparator = BytesRef.getUTF8SortedAsUnicodeComparator();
}
if (fixedSize) {
@ -131,7 +129,7 @@ public final class Bytes {
} else if (mode == Mode.DEREF) {
return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed, context);
} else if (mode == Mode.SORTED) {
return new FixedSortedBytesImpl.Writer(dir, id, comp, bytesUsed, context);
return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context);
}
} else {
if (mode == Mode.STRAIGHT) {
@ -139,7 +137,7 @@ public final class Bytes {
} else if (mode == Mode.DEREF) {
return new VarDerefBytesImpl.Writer(dir, id, bytesUsed, context);
} else if (mode == Mode.SORTED) {
return new VarSortedBytesImpl.Writer(dir, id, comp, bytesUsed, context);
return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context);
}
}
@ -163,30 +161,34 @@ public final class Bytes {
* otherwise <code>false</code>
* @param maxDoc
* the number of document values stored for the given ID
* @param sortComparator byte comparator used by sorted variants
* @param sortComparator {@link BytesRef} comparator used by sorted variants.
* If <code>null</code> {@link BytesRef#getUTF8SortedAsUnicodeComparator()}
* is used instead
* @return an initialized {@link IndexDocValues} instance.
* @throws IOException
* if an {@link IOException} occurs
*/
public static IndexDocValues getValues(Directory dir, String id, Mode mode,
boolean fixedSize, int maxDoc, Comparator<BytesRef> sortComparator, IOContext context) throws IOException {
if (sortComparator == null) {
sortComparator = BytesRef.getUTF8SortedAsUnicodeComparator();
}
// TODO -- I can peek @ header to determing fixed/mode?
if (fixedSize) {
if (mode == Mode.STRAIGHT) {
return new FixedStraightBytesImpl.Reader(dir, id, maxDoc, context);
return new FixedStraightBytesImpl.FixedStraightReader(dir, id, maxDoc, context);
} else if (mode == Mode.DEREF) {
return new FixedDerefBytesImpl.Reader(dir, id, maxDoc, context);
return new FixedDerefBytesImpl.FixedDerefReader(dir, id, maxDoc, context);
} else if (mode == Mode.SORTED) {
return new FixedSortedBytesImpl.Reader(dir, id, maxDoc, context);
return new FixedSortedBytesImpl.Reader(dir, id, maxDoc, context, ValueType.BYTES_FIXED_SORTED, sortComparator);
}
} else {
if (mode == Mode.STRAIGHT) {
return new VarStraightBytesImpl.Reader(dir, id, maxDoc, context);
return new VarStraightBytesImpl.VarStraightReader(dir, id, maxDoc, context);
} else if (mode == Mode.DEREF) {
return new VarDerefBytesImpl.Reader(dir, id, maxDoc, context);
return new VarDerefBytesImpl.VarDerefReader(dir, id, maxDoc, context);
} else if (mode == Mode.SORTED) {
return new VarSortedBytesImpl.Reader(dir, id, maxDoc, sortComparator, context);
return new VarSortedBytesImpl.Reader(dir, id, maxDoc,context, ValueType.BYTES_VAR_SORTED, sortComparator);
}
}
@ -196,7 +198,6 @@ public final class Bytes {
// TODO open up this API?
static abstract class BytesSourceBase extends Source {
private final PagedBytes pagedBytes;
private final ValueType type;
protected final IndexInput datIn;
protected final IndexInput idxIn;
protected final static int PAGED_BYTES_BITS = 15;
@ -206,6 +207,7 @@ public final class Bytes {
protected BytesSourceBase(IndexInput datIn, IndexInput idxIn,
PagedBytes pagedBytes, long bytesToRead, ValueType type) throws IOException {
super(type);
assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
+ (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
this.datIn = datIn;
@ -214,192 +216,15 @@ public final class Bytes {
this.pagedBytes.copy(datIn, bytesToRead);
data = pagedBytes.freeze(true);
this.idxIn = idxIn;
this.type = type;
}
public void close() throws IOException {
try {
data.close(); // close data
} finally {
try {
if (datIn != null) {
datIn.close();
}
} finally {
if (idxIn != null) {// if straight - no index needed
idxIn.close();
}
}
}
}
@Override
public ValueType type() {
return type;
}
@Override
public int getValueCount() {
throw new UnsupportedOperationException();
}
/**
* Returns one greater than the largest possible document number.
*/
protected abstract int maxDoc();
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
return new SourceEnum(attrSource, type(), this, maxDoc()) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs) {
return pos = NO_MORE_DOCS;
}
while (source.getBytes(target, bytesRef).length == 0) {
if (++target >= numDocs) {
return pos = NO_MORE_DOCS;
}
}
return pos = target;
}
};
}
}
static abstract class DerefBytesSourceBase extends BytesSourceBase {
protected final PackedInts.Reader addresses;
public DerefBytesSourceBase(IndexInput datIn, IndexInput idxIn, long bytesToRead, ValueType type) throws IOException {
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), bytesToRead, type);
addresses = PackedInts.getReader(idxIn);
}
@Override
public int getValueCount() {
return addresses.size();
}
@Override
protected int maxDoc() {
return addresses.size();
}
}
static abstract class BytesSortedSourceBase extends SortedSource {
private final PagedBytes pagedBytes;
private final Comparator<BytesRef> comp;
protected final PackedInts.Reader docToOrdIndex;
private final ValueType type;
protected final IndexInput datIn;
protected final IndexInput idxIn;
protected final BytesRef defaultValue = new BytesRef();
protected final static int PAGED_BYTES_BITS = 15;
protected final PagedBytes.Reader data;
protected BytesSortedSourceBase(IndexInput datIn, IndexInput idxIn,
Comparator<BytesRef> comp, long bytesToRead, ValueType type) throws IOException {
this(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), bytesToRead, type);
}
protected BytesSortedSourceBase(IndexInput datIn, IndexInput idxIn,
Comparator<BytesRef> comp, PagedBytes pagedBytes, long bytesToRead,ValueType type)
throws IOException {
assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
+ (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
this.datIn = datIn;
this.pagedBytes = pagedBytes;
this.pagedBytes.copy(datIn, bytesToRead);
data = pagedBytes.freeze(true);
this.idxIn = idxIn;
this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator()
: comp;
docToOrdIndex = PackedInts.getReader(idxIn);
this.type = type;
}
@Override
public int ord(int docID) {
return (int) docToOrdIndex.get(docID) -1;
}
@Override
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
assert ord >= 0;
return deref(ord, bytesRef);
}
protected void closeIndexInput() throws IOException {
IOUtils.close(datIn, idxIn);
}
/**
* Returns the largest doc id + 1 in this doc values source
*/
public int maxDoc() {
return docToOrdIndex.size();
}
/**
* Copies the value for the given ord to the given {@link BytesRef} and
* returns it.
*/
protected abstract BytesRef deref(int ord, BytesRef bytesRef);
protected int binarySearch(BytesRef b, BytesRef bytesRef, int low,
int high) {
int mid = 0;
while (low <= high) {
mid = (low + high) >>> 1;
deref(mid, bytesRef);
final int cmp = comp.compare(bytesRef, b);
if (cmp < 0) {
low = mid + 1;
} else if (cmp > 0) {
high = mid - 1;
} else {
return mid;
}
}
assert comp.compare(bytesRef, b) != 0;
return -(low + 1);
}
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
return new SourceEnum(attrSource, type(), this, maxDoc()) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs) {
return pos = NO_MORE_DOCS;
}
while (source.getBytes(target, bytesRef).length == 0) {
if (++target >= numDocs) {
return pos = NO_MORE_DOCS;
}
}
return pos = target;
}
};
}
@Override
public ValueType type() {
return type;
}
}
// TODO: open up this API?!
static abstract class BytesWriterBase extends Writer {
private final String id;
private IndexOutput idxOut;
private IndexOutput datOut;
protected BytesRef bytesRef;
protected BytesRef bytesRef = new BytesRef();
private final Directory dir;
private final String codecName;
private final int version;
@ -467,8 +292,8 @@ public final class Bytes {
public abstract void finish(int docCount) throws IOException;
@Override
protected void mergeDoc(int docID) throws IOException {
add(docID, bytesRef);
protected void mergeDoc(int docID, int sourceDoc) throws IOException {
add(docID, currentMergeSource.getBytes(sourceDoc, bytesRef));
}
@Override
@ -479,11 +304,6 @@ public final class Bytes {
}
}
@Override
protected void setNextEnum(ValuesEnum valuesEnum) {
bytesRef = valuesEnum.bytes();
}
@Override
public void files(Collection<String> files) throws IOException {
assert datOut != null;
@ -506,30 +326,34 @@ public final class Bytes {
protected final IndexInput datIn;
protected final int version;
protected final String id;
protected final ValueType type;
protected BytesReaderBase(Directory dir, String id, String codecName,
int maxVersion, boolean doIndex, IOContext context) throws IOException {
this.id = id;
datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
Writer.DATA_EXTENSION), context);
int maxVersion, boolean doIndex, IOContext context, ValueType type) throws IOException {
IndexInput dataIn = null;
IndexInput indexIn = null;
boolean success = false;
try {
version = CodecUtil.checkHeader(datIn, codecName, maxVersion, maxVersion);
if (doIndex) {
idxIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
Writer.INDEX_EXTENSION), context);
final int version2 = CodecUtil.checkHeader(idxIn, codecName,
maxVersion, maxVersion);
assert version == version2;
} else {
idxIn = null;
}
success = true;
dataIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
Writer.DATA_EXTENSION), context);
version = CodecUtil.checkHeader(dataIn, codecName, maxVersion, maxVersion);
if (doIndex) {
indexIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
Writer.INDEX_EXTENSION), context);
final int version2 = CodecUtil.checkHeader(indexIn, codecName,
maxVersion, maxVersion);
assert version == version2;
}
success = true;
} finally {
if (!success) {
closeInternal();
IOUtils.closeWhileHandlingException(dataIn, indexIn);
}
}
datIn = dataIn;
idxIn = indexIn;
this.type = type;
this.id = id;
}
/**
@ -553,23 +377,20 @@ public final class Bytes {
try {
super.close();
} finally {
closeInternal();
IOUtils.close(datIn, idxIn);
}
}
@Override
public ValueType type() {
return type;
}
private void closeInternal() throws IOException {
try {
datIn.close();
} finally {
if (idxIn != null) {
idxIn.close();
}
}
}
}
static abstract class DerefBytesWriterBase extends BytesWriterBase {
protected int size = -1;
protected int lastDocId = -1;
protected int[] docToEntry;
protected final BytesRefHash hash;
@ -608,17 +429,33 @@ public final class Bytes {
return;
}
checkSize(bytes);
fillDefault(docID);
int ord = hash.add(bytes);
if (ord < 0) {
ord = (-ord) - 1;
}
docToEntry[docID] = ord;
lastDocId = docID;
}
protected void fillDefault(int docID) {
if (docID >= docToEntry.length) {
final int size = docToEntry.length;
docToEntry = ArrayUtil.grow(docToEntry, 1 + docID);
bytesUsed.addAndGet((docToEntry.length - size)
* RamUsageEstimator.NUM_BYTES_INT);
}
docToEntry[docID] = 1 + ord;
assert size >= 0;
BytesRef ref = new BytesRef(size);
ref.length = size;
int ord = hash.add(ref);
if (ord < 0) {
ord = (-ord) - 1;
}
for (int i = lastDocId+1; i < docID; i++) {
docToEntry[i] = ord;
}
}
protected void checkSize(BytesRef bytes) {
@ -713,77 +550,50 @@ public final class Bytes {
}
abstract static class DerefBytesEnumBase extends ValuesEnum {
private final PackedInts.ReaderIterator idx;
private final int valueCount;
private int pos = -1;
static abstract class BytesSortedSourceBase extends SortedSource {
private final PagedBytes pagedBytes;
protected final PackedInts.Reader docToOrdIndex;
protected final IndexInput datIn;
protected final long fp;
protected final int size;
protected final IndexInput idxIn;
protected final BytesRef defaultValue = new BytesRef();
protected final static int PAGED_BYTES_BITS = 15;
protected final PagedBytes.Reader data;
protected DerefBytesEnumBase(AttributeSource source, IndexInput datIn,
IndexInput idxIn, int size, ValueType enumType) throws IOException {
super(source, enumType);
protected BytesSortedSourceBase(IndexInput datIn, IndexInput idxIn,
Comparator<BytesRef> comp, long bytesToRead, ValueType type) throws IOException {
this(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), bytesToRead, type);
}
protected BytesSortedSourceBase(IndexInput datIn, IndexInput idxIn,
Comparator<BytesRef> comp, PagedBytes pagedBytes, long bytesToRead,ValueType type)
throws IOException {
super(type, comp);
assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
+ (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
this.datIn = datIn;
this.size = size;
idx = PackedInts.getReaderIterator(idxIn);
fp = datIn.getFilePointer();
if (size > 0) {
bytesRef.grow(this.size);
bytesRef.length = this.size;
}
bytesRef.offset = 0;
valueCount = idx.size();
}
this.pagedBytes = pagedBytes;
this.pagedBytes.copy(datIn, bytesToRead);
data = pagedBytes.freeze(true);
this.idxIn = idxIn;
docToOrdIndex = PackedInts.getReader(idxIn);
protected void copyFrom(ValuesEnum valuesEnum) {
bytesRef = valuesEnum.bytesRef;
if (bytesRef.bytes.length < size) {
bytesRef.grow(size);
}
bytesRef.length = size;
bytesRef.offset = 0;
}
@Override
public int advance(int target) throws IOException {
if (target < valueCount) {
long address;
while ((address = idx.advance(target)) == 0) {
if (++target >= valueCount) {
return pos = NO_MORE_DOCS;
}
}
pos = idx.ord();
fill(address, bytesRef);
return pos;
}
return pos = NO_MORE_DOCS;
public int ord(int docID) {
return (int) docToOrdIndex.get(docID);
}
@Override
public int nextDoc() throws IOException {
if (pos >= valueCount) {
return pos = NO_MORE_DOCS;
}
return advance(pos + 1);
protected void closeIndexInput() throws IOException {
IOUtils.close(datIn, idxIn);
}
public void close() throws IOException {
try {
datIn.close();
} finally {
idx.close();
}
/**
* Returns the largest doc id + 1 in this doc values source
*/
public int maxDoc() {
return docToOrdIndex.size();
}
protected abstract void fill(long address, BytesRef ref) throws IOException;
@Override
public int docID() {
return pos;
}
}
}
}

View File

@ -0,0 +1,137 @@
package org.apache.lucene.index.values;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
/**
* Base class for disk resident source implementations
* @lucene.internal
*/
abstract class DirectSource extends Source {
protected final IndexInput data;
private final ToNumeric toNumeric;
protected final long baseOffset;
DirectSource(IndexInput input, ValueType type) {
super(type);
this.data = input;
baseOffset = input.getFilePointer();
switch (type) {
case FIXED_INTS_16:
toNumeric = new ShortToLong();
break;
case FLOAT_32:
case FIXED_INTS_32:
toNumeric = new IntToLong();
break;
case FIXED_INTS_8:
toNumeric = new ByteToLong();
break;
default:
toNumeric = new LongToLong();
}
}
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
try {
final int sizeToRead = position(docID);
ref.grow(sizeToRead);
data.readBytes(ref.bytes, 0, sizeToRead);
ref.length = sizeToRead;
ref.offset = 0;
return ref;
} catch (IOException ex) {
throw new IllegalStateException("failed to get value for docID: " + docID, ex);
}
}
@Override
public long getInt(int docID) {
try {
position(docID);
return toNumeric.toLong(data);
} catch (IOException ex) {
throw new IllegalStateException("failed to get value for docID: " + docID, ex);
}
}
@Override
public double getFloat(int docID) {
try {
position(docID);
return toNumeric.toDouble(data);
} catch (IOException ex) {
throw new IllegalStateException("failed to get value for docID: " + docID, ex);
}
}
protected abstract int position(int docID) throws IOException;
private abstract static class ToNumeric {
abstract long toLong(IndexInput input) throws IOException;
double toDouble(IndexInput input) throws IOException {
return toLong(input);
}
}
private static final class ByteToLong extends ToNumeric {
@Override
long toLong(IndexInput input) throws IOException {
return input.readByte();
}
}
private static final class ShortToLong extends ToNumeric {
@Override
long toLong(IndexInput input) throws IOException {
return input.readShort();
}
}
private static final class IntToLong extends ToNumeric {
@Override
long toLong(IndexInput input) throws IOException {
return input.readInt();
}
double toDouble(IndexInput input) throws IOException {
return Float.intBitsToFloat(input.readInt());
}
}
private static final class LongToLong extends ToNumeric {
@Override
long toLong(IndexInput input) throws IOException {
return input.readLong();
}
double toDouble(IndexInput input) throws IOException {
return Double.longBitsToDouble(input.readLong());
}
}
}

View File

@ -20,16 +20,17 @@ package org.apache.lucene.index.values;
import java.io.IOException;
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
import org.apache.lucene.index.values.Bytes.DerefBytesSourceBase;
import org.apache.lucene.index.values.Bytes.DerefBytesEnumBase;
import org.apache.lucene.index.values.Bytes.BytesSourceBase;
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
import org.apache.lucene.index.values.DirectSource;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.PackedInts;
// Stores fixed-length byte[] by deref, ie when two docs
// have the same value, they store only 1 byte[]
@ -66,63 +67,61 @@ class FixedDerefBytesImpl {
}
}
public static class Reader extends BytesReaderBase {
public static class FixedDerefReader extends BytesReaderBase {
private final int size;
private final int numValuesStored;
Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_START, true, context);
FixedDerefReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_START, true, context, ValueType.BYTES_FIXED_DEREF);
size = datIn.readInt();
numValuesStored = idxIn.readInt();
}
@Override
public Source load() throws IOException {
return new Source(cloneData(), cloneIndex(), size, numValuesStored);
}
private static final class Source extends DerefBytesSourceBase {
private final int size;
protected Source(IndexInput datIn, IndexInput idxIn, int size, long numValues) throws IOException {
super(datIn, idxIn, size * numValues, ValueType.BYTES_FIXED_DEREF);
this.size = size;
}
@Override
public BytesRef getBytes(int docID, BytesRef bytesRef) {
final int id = (int) addresses.get(docID);
if (id == 0) {
bytesRef.length = 0;
return bytesRef;
}
return data.fillSlice(bytesRef, ((id - 1) * size), size);
}
return new FixedDerefSource(cloneData(), cloneIndex(), size, numValuesStored);
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
return new DerefBytesEnum(source, cloneData(), cloneIndex(), size);
public Source getDirectSource()
throws IOException {
return new DirectFixedDerefSource(cloneData(), cloneIndex(), size, type());
}
}
static final class FixedDerefSource extends BytesSourceBase {
private final int size;
private final PackedInts.Reader addresses;
final static class DerefBytesEnum extends DerefBytesEnumBase {
public DerefBytesEnum(AttributeSource source, IndexInput datIn,
IndexInput idxIn, int size) throws IOException {
super(source, datIn, idxIn, size, ValueType.BYTES_FIXED_DEREF);
}
protected void fill(long address, BytesRef ref) throws IOException {
datIn.seek(fp + ((address - 1) * size));
datIn.readBytes(ref.bytes, 0, size);
ref.length = size;
ref.offset = 0;
}
protected FixedDerefSource(IndexInput datIn, IndexInput idxIn, int size, long numValues) throws IOException {
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size * numValues,
ValueType.BYTES_FIXED_DEREF);
this.size = size;
addresses = PackedInts.getReader(idxIn);
}
@Override
public ValueType type() {
return ValueType.BYTES_FIXED_DEREF;
public BytesRef getBytes(int docID, BytesRef bytesRef) {
final int id = (int) addresses.get(docID);
return data.fillSlice(bytesRef, (id * size), size);
}
}
final static class DirectFixedDerefSource extends DirectSource {
private final PackedInts.RandomAccessReaderIterator index;
private final int size;
DirectFixedDerefSource(IndexInput data, IndexInput index, int size, ValueType type)
throws IOException {
super(data, type);
this.size = size;
this.index = PackedInts.getRandomAccessReaderIterator(index);
}
@Override
protected int position(int docID) throws IOException {
data.seek(baseOffset + index.get(docID) * size);
return size;
}
}

View File

@ -23,14 +23,14 @@ import java.util.Comparator;
import org.apache.lucene.index.values.Bytes.BytesSortedSourceBase;
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum;
import org.apache.lucene.index.values.IndexDocValues.SortedSource;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.packed.PackedInts;
// Stores fixed-length byte[] by deref, ie when two docs
// have the same value, they store only 1 byte[]
@ -44,7 +44,7 @@ class FixedSortedBytesImpl {
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
static class Writer extends DerefBytesWriterBase {
static final class Writer extends DerefBytesWriterBase {
private final Comparator<BytesRef> comp;
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
@ -57,9 +57,10 @@ class FixedSortedBytesImpl {
// some last docs that we didn't see
@Override
public void finishInternal(int docCount) throws IOException {
fillDefault(docCount);
final IndexOutput datOut = getOrCreateDataOut();
final int count = hash.size();
final int[] address = new int[count+1]; // addr 0 is default values
final int[] address = new int[count]; // addr 0 is default values
datOut.writeInt(size);
if (size != -1) {
final int[] sortedEntries = hash.sort(comp);
@ -70,7 +71,7 @@ class FixedSortedBytesImpl {
final BytesRef bytes = hash.get(e, bytesRef);
assert bytes.length == size;
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
address[e + 1] = 1 + i;
address[e] = i;
}
}
final IndexOutput idxOut = getOrCreateIndexOut();
@ -79,65 +80,101 @@ class FixedSortedBytesImpl {
}
}
public static class Reader extends BytesReaderBase {
static final class Reader extends BytesReaderBase {
private final int size;
private final int numValuesStored;
private final int valueCount;
private final Comparator<BytesRef> comparator;
public Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_START, true, context);
public Reader(Directory dir, String id, int maxDoc, IOContext context,
ValueType type, Comparator<BytesRef> comparator) throws IOException {
super(dir, id, CODEC_NAME, VERSION_START, true, context, type);
size = datIn.readInt();
numValuesStored = idxIn.readInt();
valueCount = idxIn.readInt();
this.comparator = comparator;
}
@Override
public org.apache.lucene.index.values.IndexDocValues.Source load()
public Source load() throws IOException {
return new FixedSortedSource(cloneData(), cloneIndex(), size,
valueCount, comparator);
}
@Override
public Source getDirectSource() throws IOException {
return new DirectFixedSortedSource(cloneData(), cloneIndex(), size,
valueCount, comparator, type);
}
}
static final class FixedSortedSource extends BytesSortedSourceBase {
private final int valueCount;
private final int size;
FixedSortedSource(IndexInput datIn, IndexInput idxIn, int size,
int numValues, Comparator<BytesRef> comp) throws IOException {
super(datIn, idxIn, comp, size * numValues, ValueType.BYTES_FIXED_SORTED);
this.size = size;
this.valueCount = numValues;
closeIndexInput();
}
@Override
public int getValueCount() {
return valueCount;
}
@Override
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
return data.fillSlice(bytesRef, (ord * size), size);
}
}
static final class DirectFixedSortedSource extends SortedSource {
final PackedInts.RandomAccessReaderIterator docToOrdIndex;
private final IndexInput datIn;
private final long basePointer;
private final int size;
private final int valueCount;
DirectFixedSortedSource(IndexInput datIn, IndexInput idxIn, int size,
int valueCount, Comparator<BytesRef> comp, ValueType type)
throws IOException {
return loadSorted(null);
super(type, comp);
docToOrdIndex = PackedInts.getRandomAccessReaderIterator(idxIn);
basePointer = datIn.getFilePointer();
this.datIn = datIn;
this.size = size;
this.valueCount = valueCount;
}
@Override
public SortedSource loadSorted(Comparator<BytesRef> comp)
throws IOException {
return new Source(cloneData(), cloneIndex(), size, numValuesStored, comp);
}
private static class Source extends BytesSortedSourceBase {
private final int valueCount;
private final int size;
public Source(IndexInput datIn, IndexInput idxIn, int size,
int numValues, Comparator<BytesRef> comp) throws IOException {
super(datIn, idxIn, comp, size * numValues, ValueType.BYTES_FIXED_SORTED);
this.size = size;
this.valueCount = numValues;
closeIndexInput();
}
@Override
public int getByValue(BytesRef bytes, BytesRef tmpRef) {
return binarySearch(bytes, tmpRef, 0, valueCount - 1);
}
@Override
public int getValueCount() {
return valueCount;
}
@Override
protected BytesRef deref(int ord, BytesRef bytesRef) {
return data.fillSlice(bytesRef, (ord * size), size);
public int ord(int docID) {
try {
return (int) docToOrdIndex.get(docID);
} catch (IOException e) {
throw new IllegalStateException("failed to get ord", e);
}
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
// do unsorted
return new DerefBytesEnum(source, cloneData(), cloneIndex(), size);
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
try {
datIn.seek(basePointer + size * ord);
if (bytesRef.bytes.length < size) {
bytesRef.grow(size);
}
datIn.readBytes(bytesRef.bytes, 0, size);
bytesRef.length = size;
bytesRef.offset = 0;
return bytesRef;
} catch (IOException ex) {
throw new IllegalStateException("failed to getByOrd", ex);
}
}
@Override
public ValueType type() {
return ValueType.BYTES_FIXED_SORTED;
public int getValueCount() {
return valueCount;
}
}
}

View File

@ -24,11 +24,12 @@ import java.io.IOException;
import org.apache.lucene.index.values.Bytes.BytesSourceBase;
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
import org.apache.lucene.index.values.DirectSource;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
import org.apache.lucene.util.BytesRef;
@ -137,8 +138,8 @@ class FixedStraightBytesImpl {
datOut = getOrCreateDataOut();
boolean success = false;
try {
if (state.liveDocs == null && state.reader instanceof Reader ) {
Reader reader = (Reader) state.reader;
if (state.liveDocs == null && state.reader instanceof FixedStraightReader ) {
FixedStraightReader reader = (FixedStraightReader) state.reader;
final int maxDocs = reader.maxDoc;
if (maxDocs == 0) {
return;
@ -175,8 +176,9 @@ class FixedStraightBytesImpl {
}
@Override
protected void mergeDoc(int docID) throws IOException {
protected void mergeDoc(int docID, int sourceDoc) throws IOException {
assert lastDocID < docID;
currentMergeSource.getBytes(sourceDoc, bytesRef);
if (size == -1) {
size = bytesRef.length;
datOut.writeInt(size);
@ -236,16 +238,16 @@ class FixedStraightBytesImpl {
}
public static class Reader extends BytesReaderBase {
public static class FixedStraightReader extends BytesReaderBase {
protected final int size;
protected final int maxDoc;
Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
this(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context);
FixedStraightReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
this(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context, ValueType.BYTES_FIXED_STRAIGHT);
}
protected Reader(Directory dir, String id, String codec, int version, int maxDoc, IOContext context) throws IOException {
super(dir, id, codec, version, false, context);
protected FixedStraightReader(Directory dir, String id, String codec, int version, int maxDoc, IOContext context, ValueType type) throws IOException {
super(dir, id, codec, version, false, context, type);
size = datIn.readInt();
this.maxDoc = maxDoc;
}
@ -253,155 +255,83 @@ class FixedStraightBytesImpl {
@Override
public Source load() throws IOException {
return size == 1 ? new SingleByteSource(cloneData(), maxDoc) :
new StraightBytesSource(cloneData(), size, maxDoc);
new FixedStraightSource(cloneData(), size, maxDoc, type);
}
@Override
public void close() throws IOException {
datIn.close();
}
// specialized version for single bytes
private static class SingleByteSource extends Source {
private final int maxDoc;
private final byte[] data;
public SingleByteSource(IndexInput datIn, int maxDoc) throws IOException {
this.maxDoc = maxDoc;
try {
data = new byte[maxDoc];
datIn.readBytes(data, 0, data.length, false);
} finally {
IOUtils.close(datIn);
}
}
@Override
public BytesRef getBytes(int docID, BytesRef bytesRef) {
bytesRef.length = 1;
bytesRef.bytes = data;
bytesRef.offset = docID;
return bytesRef;
}
@Override
public ValueType type() {
return ValueType.BYTES_FIXED_STRAIGHT;
}
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
return new SourceEnum(attrSource, type(), this, maxDoc) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs) {
return pos = NO_MORE_DOCS;
}
bytesRef.length = 1;
bytesRef.bytes = data;
bytesRef.offset = target;
return pos = target;
}
};
}
}
private final static class StraightBytesSource extends BytesSourceBase {
private final int size;
private final int maxDoc;
public StraightBytesSource(IndexInput datIn, int size, int maxDoc)
throws IOException {
super(datIn, null, new PagedBytes(PAGED_BYTES_BITS), size * maxDoc, ValueType.BYTES_FIXED_STRAIGHT);
this.size = size;
this.maxDoc = maxDoc;
}
@Override
public BytesRef getBytes(int docID, BytesRef bytesRef) {
return data.fillSlice(bytesRef, docID * size, size);
}
@Override
public int getValueCount() {
return maxDoc;
}
@Override
protected int maxDoc() {
return maxDoc;
}
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
return new FixedStraightBytesEnum(source, cloneData(), size, maxDoc);
}
@Override
public ValueType type() {
return ValueType.BYTES_FIXED_STRAIGHT;
public Source getDirectSource() throws IOException {
return new DirectFixedStraightSource(cloneData(), size, type());
}
}
static class FixedStraightBytesEnum extends ValuesEnum {
private final IndexInput datIn;
private final int size;
private final int maxDoc;
private int pos = -1;
private final long fp;
// specialized version for single bytes
private static final class SingleByteSource extends Source {
private final byte[] data;
public FixedStraightBytesEnum(AttributeSource source, IndexInput datIn,
int size, int maxDoc) throws IOException {
super(source, ValueType.BYTES_FIXED_STRAIGHT);
this.datIn = datIn;
this.size = size;
this.maxDoc = maxDoc;
bytesRef.grow(size);
bytesRef.length = size;
bytesRef.offset = 0;
fp = datIn.getFilePointer();
}
protected void copyFrom(ValuesEnum valuesEnum) {
super.copyFrom(valuesEnum);
if (bytesRef.bytes.length < size) {
bytesRef.grow(size);
public SingleByteSource(IndexInput datIn, int maxDoc) throws IOException {
super(ValueType.BYTES_FIXED_STRAIGHT);
try {
data = new byte[maxDoc];
datIn.readBytes(data, 0, data.length, false);
} finally {
IOUtils.close(datIn);
}
bytesRef.length = size;
bytesRef.offset = 0;
}
public void close() throws IOException {
datIn.close();
@Override
public boolean hasArray() {
return true;
}
@Override
public int advance(int target) throws IOException {
if (target >= maxDoc || size == 0) {
return pos = NO_MORE_DOCS;
}
if ((target - 1) != pos) // pos inc == 1
datIn.seek(fp + target * size);
datIn.readBytes(bytesRef.bytes, 0, size);
return pos = target;
public Object getArray() {
return data;
}
@Override
public int docID() {
return pos;
}
@Override
public int nextDoc() throws IOException {
if (pos >= maxDoc) {
return pos = NO_MORE_DOCS;
}
return advance(pos + 1);
public BytesRef getBytes(int docID, BytesRef bytesRef) {
bytesRef.length = 1;
bytesRef.bytes = data;
bytesRef.offset = docID;
return bytesRef;
}
}
private final static class FixedStraightSource extends BytesSourceBase {
private final int size;
public FixedStraightSource(IndexInput datIn, int size, int maxDoc, ValueType type)
throws IOException {
super(datIn, null, new PagedBytes(PAGED_BYTES_BITS), size * maxDoc,
type);
this.size = size;
}
@Override
public BytesRef getBytes(int docID, BytesRef bytesRef) {
return data.fillSlice(bytesRef, docID * size, size);
}
}
public final static class DirectFixedStraightSource extends DirectSource {
private final int size;
DirectFixedStraightSource(IndexInput input, int size, ValueType type) {
super(input, type);
this.size = size;
}
@Override
protected int position(int docID) throws IOException {
data.seek(baseOffset + size * docID);
return size;
}
}
}

View File

@ -22,9 +22,9 @@ import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
/**
* Exposes {@link Writer} and reader ({@link Source}) for 32 bit and 64 bit
@ -37,37 +37,47 @@ import org.apache.lucene.util.Counter;
*/
public class Floats {
public static Writer getWriter(Directory dir, String id, int precisionBytes,
Counter bytesUsed, IOContext context) throws IOException {
if (precisionBytes != 4 && precisionBytes != 8) {
throw new IllegalArgumentException("precisionBytes must be 4 or 8; got "
+ precisionBytes);
}
return new FloatsWriter(dir, id, bytesUsed, context, precisionBytes);
protected static final String CODEC_NAME = "Floats";
protected static final int VERSION_START = 0;
protected static final int VERSION_CURRENT = VERSION_START;
public static Writer getWriter(Directory dir, String id, Counter bytesUsed,
IOContext context, ValueType type) throws IOException {
return new FloatsWriter(dir, id, bytesUsed, context, type);
}
public static IndexDocValues getValues(Directory dir, String id, int maxDoc, IOContext context)
public static IndexDocValues getValues(Directory dir, String id, int maxDoc, IOContext context, ValueType type)
throws IOException {
return new FloatsReader(dir, id, maxDoc, context);
return new FloatsReader(dir, id, maxDoc, context, type);
}
private static int typeToSize(ValueType type) {
switch (type) {
case FLOAT_32:
return 4;
case FLOAT_64:
return 8;
default:
throw new IllegalStateException("illegal type " + type);
}
}
final static class FloatsWriter extends FixedStraightBytesImpl.Writer {
private final int size;
private final IndexDocValuesArray template;
public FloatsWriter(Directory dir, String id, Counter bytesUsed,
IOContext context, int size) throws IOException {
super(dir, id, bytesUsed, context);
IOContext context, ValueType type) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
size = typeToSize(type);
this.bytesRef = new BytesRef(size);
this.size = size;
bytesRef.length = size;
template = IndexDocValuesArray.TEMPLATES.get(type);
assert template != null;
}
public void add(int docID, double v) throws IOException {
if (size == 8) {
bytesRef.copy(Double.doubleToRawLongBits(v));
} else {
bytesRef.copy(Float.floatToRawIntBits((float)v));
}
template.toBytes(v, bytesRef);
add(docID, bytesRef);
}
@ -76,19 +86,14 @@ public class Floats {
add(docID, docValues.getFloat());
}
}
final static class FloatsReader extends FixedStraightBytesImpl.Reader {
final static class FloatsReader extends FixedStraightBytesImpl.FixedStraightReader {
final IndexDocValuesArray arrayTemplate;
FloatsReader(Directory dir, String id, int maxDoc, IOContext context)
FloatsReader(Directory dir, String id, int maxDoc, IOContext context, ValueType type)
throws IOException {
super(dir, id, maxDoc, context);
super(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context, type);
arrayTemplate = IndexDocValuesArray.TEMPLATES.get(type);
assert size == 4 || size == 8;
if (size == 4) {
arrayTemplate = new IndexDocValuesArray.FloatValues();
} else {
arrayTemplate = new IndexDocValuesArray.DoubleValues();
}
}
@Override
@ -97,19 +102,10 @@ public class Floats {
try {
return arrayTemplate.newFromInput(indexInput, maxDoc);
} finally {
indexInput.close();
IOUtils.close(indexInput);
}
}
public ValuesEnum getEnum(AttributeSource source) throws IOException {
IndexInput indexInput = (IndexInput) datIn.clone();
return arrayTemplate.getDirectEnum(source, indexInput, maxDoc);
}
@Override
public ValueType type() {
return arrayTemplate.type();
}
}
}

View File

@ -26,7 +26,6 @@ import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
/**
@ -34,8 +33,8 @@ import org.apache.lucene.util.BytesRef;
* value access based on the lucene internal document id. {@link IndexDocValues}
* exposes two distinct APIs:
* <ul>
* <li>via {@link Source} an entirely RAM resident API for random access</li>
* <li>via {@link ValuesEnum} a disk resident API for sequential access</li>
* <li>via {@link #getSource()} providing RAM resident random access</li>
* <li>via {@link #getDirectSource()} providing on disk random access</li>
* </ul> {@link IndexDocValues} are exposed via
* {@link IndexReader#perDocValues()} on a per-segment basis. For best
* performance {@link IndexDocValues} should be consumed per-segment just like
@ -52,47 +51,18 @@ import org.apache.lucene.util.BytesRef;
* @lucene.experimental
*/
public abstract class IndexDocValues implements Closeable {
/*
* TODO: it might be useful to add another Random Access enum for some
* implementations like packed ints and only return such a random access enum
* if the impl supports random access. For super large segments it might be
* useful or even required in certain environements to have disc based random
* access
*/
public static final IndexDocValues[] EMPTY_ARRAY = new IndexDocValues[0];
private SourceCache cache = new SourceCache.DirectSourceCache();
/**
* Returns an iterator that steps through all documents values for this
* {@link IndexDocValues} field instance. {@link ValuesEnum} will skip document
* without a value if applicable.
*/
public ValuesEnum getEnum() throws IOException {
return getEnum(null);
}
/**
* Returns an iterator that steps through all documents values for this
* {@link IndexDocValues} field instance. {@link ValuesEnum} will skip document
* without a value if applicable.
* <p>
* If an {@link AttributeSource} is supplied to this method the
* {@link ValuesEnum} will use the given source to access implementation
* related attributes.
*/
public abstract ValuesEnum getEnum(AttributeSource attrSource)
throws IOException;
private volatile SourceCache cache = new SourceCache.DirectSourceCache();
private final Object cacheLock = new Object();
/**
* Loads a new {@link Source} instance for this {@link IndexDocValues} field
* instance. Source instances returned from this method are not cached. It is
* the callers responsibility to maintain the instance and release its
* resources once the source is not needed anymore.
* <p>
* This method will return null iff this {@link IndexDocValues} represent a
* {@link SortedSource}.
* <p>
* For managed {@link Source} instances see {@link #getSource()}.
*
* @see #getSource()
@ -111,62 +81,17 @@ public abstract class IndexDocValues implements Closeable {
* from the cache once this {@link IndexDocValues} instance is closed by the
* {@link IndexReader}, {@link Fields} or {@link FieldsEnum} the
* {@link IndexDocValues} was created from.
* <p>
* This method will return null iff this {@link IndexDocValues} represent a
* {@link SortedSource}.
*/
public Source getSource() throws IOException {
return cache.load(this);
}
/**
* Returns a {@link SortedSource} instance for this {@link IndexDocValues} field
* instance like {@link #getSource()}.
* <p>
* This method will return null iff this {@link IndexDocValues} represent a
* {@link Source} instead of a {@link SortedSource}.
* Returns a disk resident {@link Source} instance. Direct Sources are not
* cached in the {@link SourceCache} and should not be shared between threads.
*/
public SortedSource getSortedSorted(Comparator<BytesRef> comparator)
throws IOException {
return cache.loadSorted(this, comparator);
}
/**
* Returns a {@link SortedSource} instance using a default {@link BytesRef}
* comparator for this {@link IndexDocValues} field instance like
* {@link #getSource()}.
* <p>
* This method will return null iff this {@link IndexDocValues} represent a
* {@link Source} instead of a {@link SortedSource}.
*/
public SortedSource getSortedSorted() throws IOException {
return getSortedSorted(null);
}
public abstract Source getDirectSource() throws IOException;
/**
* Loads and returns a {@link SortedSource} instance for this
* {@link IndexDocValues} field instance like {@link #load()}.
* <p>
* This method will return null iff this {@link IndexDocValues} represent a
* {@link Source} instead of a {@link SortedSource}.
*/
public SortedSource loadSorted(Comparator<BytesRef> comparator)
throws IOException {
throw new UnsupportedOperationException();
}
/**
* Loads and returns a {@link SortedSource} instance using a default
* {@link BytesRef} comparator for this {@link IndexDocValues} field instance
* like {@link #load()}.
* <p>
* This method will return null iff this {@link IndexDocValues} represent a
* {@link Source} instead of a {@link SortedSource}.
*/
public SortedSource loadSorted() throws IOException {
return loadSorted(null);
}
/**
* Returns the {@link ValueType} of this {@link IndexDocValues} instance
*/
@ -183,13 +108,10 @@ public abstract class IndexDocValues implements Closeable {
/**
* Sets the {@link SourceCache} used by this {@link IndexDocValues} instance. This
* method should be called before {@link #load()} or
* {@link #loadSorted(Comparator)} is called. All {@link Source} or
* {@link SortedSource} instances in the currently used cache will be closed
* method should be called before {@link #load()} is called. All {@link Source} instances in the currently used cache will be closed
* before the new cache is installed.
* <p>
* Note: All instances previously obtained from {@link #load()} or
* {@link #loadSorted(Comparator)} will be closed.
* Note: All instances previously obtained from {@link #load()} will be lost.
*
* @throws IllegalArgumentException
* if the given cache is <code>null</code>
@ -198,9 +120,10 @@ public abstract class IndexDocValues implements Closeable {
public void setCache(SourceCache cache) {
if (cache == null)
throw new IllegalArgumentException("cache must not be null");
synchronized (this.cache) {
this.cache.close(this);
synchronized (cacheLock) {
SourceCache toClose = this.cache;
this.cache = cache;
toClose.close(this);
}
}
@ -208,12 +131,17 @@ public abstract class IndexDocValues implements Closeable {
* Source of per document values like long, double or {@link BytesRef}
* depending on the {@link IndexDocValues} fields {@link ValueType}. Source
* implementations provide random access semantics similar to array lookups
* and typically are entirely memory resident.
* <p>
* {@link Source} defines 3 {@link ValueType} //TODO finish this
* @see IndexDocValues#getSource()
* @see IndexDocValues#getDirectSource()
*/
public static abstract class Source {
protected final ValueType type;
protected Source(ValueType type) {
this.type = type;
}
/**
* Returns a <tt>long</tt> for the given document id or throws an
* {@link UnsupportedOperationException} if this source doesn't support
@ -242,6 +170,7 @@ public abstract class IndexDocValues implements Closeable {
* Returns a {@link BytesRef} for the given document id or throws an
* {@link UnsupportedOperationException} if this source doesn't support
* <tt>byte[]</tt> values.
* @throws IOException
*
* @throws UnsupportedOperationException
* if this source doesn't support <tt>byte[]</tt> values.
@ -250,35 +179,15 @@ public abstract class IndexDocValues implements Closeable {
throw new UnsupportedOperationException("bytes are not supported");
}
/**
* Returns number of unique values. Some implementations may throw
* UnsupportedOperationException.
*/
public int getValueCount() {
throw new UnsupportedOperationException();
}
/**
* Returns a {@link ValuesEnum} for this source.
*/
public ValuesEnum getEnum() throws IOException {
return getEnum(null);
}
/**
* Returns the {@link ValueType} of this source.
*
* @return the {@link ValueType} of this source.
*/
public abstract ValueType type();
public ValueType type() {
return type;
}
/**
* Returns a {@link ValuesEnum} for this source which uses the given
* {@link AttributeSource}.
*/
public abstract ValuesEnum getEnum(AttributeSource attrSource)
throws IOException;
/**
* Returns <code>true</code> iff this {@link Source} exposes an array via
* {@link #getArray()} otherwise <code>false</code>.
@ -297,61 +206,29 @@ public abstract class IndexDocValues implements Closeable {
public Object getArray() {
return null;
}
}
/**
* {@link ValuesEnum} utility for {@link Source} implemenations.
*
*/
public abstract static class SourceEnum extends ValuesEnum {
protected final Source source;
protected final int numDocs;
protected int pos = -1;
/**
* Creates a new {@link SourceEnum}
*
* @param attrs
* the {@link AttributeSource} for this enum
* @param type
* the enums {@link ValueType}
* @param source
* the source this enum operates on
* @param numDocs
* the number of documents within the source
* If this {@link Source} is sorted this method will return an instance of
* {@link SortedSource} otherwise <code>null</code>
*/
protected SourceEnum(AttributeSource attrs, ValueType type, Source source,
int numDocs) {
super(attrs, type);
this.source = source;
this.numDocs = numDocs;
}
@Override
public void close() throws IOException {
}
@Override
public int docID() {
return pos;
}
@Override
public int nextDoc() throws IOException {
if (pos == NO_MORE_DOCS)
return NO_MORE_DOCS;
return advance(pos + 1);
public SortedSource asSortedSource() {
return null;
}
}
/**
* A sorted variant of {@link Source} for <tt>byte[]</tt> values per document.
* <p>
* Note: {@link ValuesEnum} obtained from a {@link SortedSource} will
* enumerate values in document order and not in sorted order.
*/
public static abstract class SortedSource extends Source {
private final Comparator<BytesRef> comparator;
protected SortedSource(ValueType type, Comparator<BytesRef> comparator) {
super(type);
this.comparator = comparator;
}
@Override
public BytesRef getBytes(int docID, BytesRef bytesRef) {
final int ord = ord(docID);
@ -364,8 +241,7 @@ public abstract class IndexDocValues implements Closeable {
}
/**
* Returns ord for specified docID. If this docID had not been added to the
* Writer, the ord is 0. Ord is dense, ie, starts at 0, then increments by 1
* Returns ord for specified docID. Ord is dense, ie, starts at 0, then increments by 1
* for the next (as defined by {@link Comparator} value.
*/
public abstract int ord(int docID);
@ -373,28 +249,13 @@ public abstract class IndexDocValues implements Closeable {
/** Returns value for specified ord. */
public abstract BytesRef getByOrd(int ord, BytesRef bytesRef);
/**
* Finds the ordinal whose value is greater or equal to the given value.
*
* @return the given values ordinal if found or otherwise
* <code>(-(ord)-1)</code>, defined as the ordinal of the first
* element that is greater than the given value. This guarantees
* that the return value will always be &gt;= 0 if the given value
* is found.
*
*/
public final int getByValue(BytesRef value) {
return getByValue(value, new BytesRef());
}
/**
* Performs a lookup by value.
*
* @param value
* the value to look up
* @param tmpRef
* a temporary {@link BytesRef} instance used to compare internal
* @param spare
* a spare {@link BytesRef} instance used to compare internal
* values to the given value. Must not be <code>null</code>
* @return the given values ordinal if found or otherwise
* <code>(-(ord)-1)</code>, defined as the ordinal of the first
@ -402,6 +263,37 @@ public abstract class IndexDocValues implements Closeable {
* that the return value will always be &gt;= 0 if the given value
* is found.
*/
public abstract int getByValue(BytesRef value, BytesRef tmpRef);
public int getByValue(BytesRef value, BytesRef spare) {
return binarySearch(value, spare, 0, getValueCount() - 1);
}
protected int binarySearch(BytesRef b, BytesRef bytesRef, int low,
int high) {
int mid = 0;
while (low <= high) {
mid = (low + high) >>> 1;
getByOrd(mid, bytesRef);
final int cmp = comparator.compare(bytesRef, b);
if (cmp < 0) {
low = mid + 1;
} else if (cmp > 0) {
high = mid - 1;
} else {
return mid;
}
}
assert comparator.compare(bytesRef, b) != 0;
return -(low + 1);
}
@Override
public SortedSource asSortedSource() {
return this;
}
/**
* Returns the number of unique values in this sorted source
*/
public abstract int getValueCount();
}
}

View File

@ -1,12 +1,12 @@
package org.apache.lucene.index.values;
import java.io.IOException;
import java.util.Collections;
import java.util.EnumMap;
import java.util.Map;
import org.apache.lucene.index.values.FixedStraightBytesImpl.FixedStraightBytesEnum;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
@ -32,84 +32,43 @@ import org.apache.lucene.util.RamUsageEstimator;
*/
abstract class IndexDocValuesArray extends Source {
static final Map<ValueType, IndexDocValuesArray> TEMPLATES;
static {
EnumMap<ValueType, IndexDocValuesArray> templates = new EnumMap<ValueType, IndexDocValuesArray>(
ValueType.class);
templates.put(ValueType.FIXED_INTS_16, new ShortValues());
templates.put(ValueType.FIXED_INTS_32, new IntValues());
templates.put(ValueType.FIXED_INTS_64, new LongValues());
templates.put(ValueType.FIXED_INTS_8, new ByteValues());
templates.put(ValueType.FLOAT_32, new FloatValues());
templates.put(ValueType.FLOAT_64, new DoubleValues());
TEMPLATES = Collections.unmodifiableMap(templates);
}
protected final int bytesPerValue;
private final ValueType type;
private final boolean isFloat;
protected int maxDocID = -1;
IndexDocValuesArray(int bytesPerValue, ValueType type) {
super(type);
this.bytesPerValue = bytesPerValue;
this.type = type;
switch (type) {
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
isFloat = false;
break;
case FLOAT_32:
case FLOAT_64:
isFloat = true;
break;
default:
throw new IllegalStateException("illegal type: " + type);
}
}
public abstract IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException;
@Override
public final int getValueCount() {
return maxDocID + 1;
}
@Override
public final ValueType type() {
return type;
}
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
if (isFloat) {
return new SourceEnum(attrSource, type(), this, maxDocID + 1) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs) {
return pos = NO_MORE_DOCS;
}
floatsRef.floats[intsRef.offset] = IndexDocValuesArray.this
.getFloat(target);
return pos = target;
}
};
} else {
return new SourceEnum(attrSource, type(), this, maxDocID + 1) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs) {
return pos = NO_MORE_DOCS;
}
intsRef.ints[intsRef.offset] = IndexDocValuesArray.this
.getInt(target);
return pos = target;
}
};
}
}
abstract ValuesEnum getDirectEnum(AttributeSource attrSource,
IndexInput input, int maxDoc) throws IOException;
@Override
public final boolean hasArray() {
return true;
}
void toBytes(long value, BytesRef bytesRef) {
bytesRef.copy(value);
}
void toBytes(double value, BytesRef bytesRef) {
bytesRef.copy(Double.doubleToRawLongBits(value));
}
final static class ByteValues extends IndexDocValuesArray {
private final byte[] values;
@ -122,7 +81,6 @@ abstract class IndexDocValuesArray extends Source {
super(1, ValueType.FIXED_INTS_8);
values = new byte[numDocs];
input.readBytes(values, 0, values.length, false);
maxDocID = numDocs - 1;
}
@Override
@ -136,25 +94,16 @@ abstract class IndexDocValuesArray extends Source {
return values[docID];
}
@Override
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
int maxDoc) throws IOException {
return new FixedIntsEnum(attrSource, input, type(),
bytesPerValue, maxDoc) {
@Override
protected final long toLong(BytesRef bytesRef) {
return bytesRef.bytes[bytesRef.offset];
}
};
}
@Override
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException {
return new ByteValues(input, numDocs);
}
void toBytes(long value, BytesRef bytesRef) {
bytesRef.bytes[0] = (byte) (0xFFL & value);
}
};
final static class ShortValues extends IndexDocValuesArray {
@ -171,7 +120,6 @@ abstract class IndexDocValuesArray extends Source {
for (int i = 0; i < values.length; i++) {
values[i] = input.readShort();
}
maxDocID = numDocs - 1;
}
@Override
@ -185,25 +133,16 @@ abstract class IndexDocValuesArray extends Source {
return values[docID];
}
@Override
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
int maxDoc) throws IOException {
return new FixedIntsEnum(attrSource, input, type(),
bytesPerValue, maxDoc) {
@Override
protected final long toLong(BytesRef bytesRef) {
return bytesRef.asShort();
}
};
}
@Override
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException {
return new ShortValues(input, numDocs);
}
void toBytes(long value, BytesRef bytesRef) {
bytesRef.copy((short) (0xFFFFL & value));
}
};
final static class IntValues extends IndexDocValuesArray {
@ -220,7 +159,6 @@ abstract class IndexDocValuesArray extends Source {
for (int i = 0; i < values.length; i++) {
values[i] = input.readInt();
}
maxDocID = numDocs - 1;
}
@Override
@ -234,24 +172,16 @@ abstract class IndexDocValuesArray extends Source {
return 0xFFFFFFFF & values[docID];
}
@Override
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
int maxDoc) throws IOException {
return new FixedIntsEnum(attrSource, input, type(),
bytesPerValue, maxDoc) {
@Override
protected final long toLong(BytesRef bytesRef) {
return bytesRef.asInt();
}
};
}
@Override
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException {
return new IntValues(input, numDocs);
}
void toBytes(long value, BytesRef bytesRef) {
bytesRef.copy((int) (0xFFFFFFFF & value));
}
};
final static class LongValues extends IndexDocValuesArray {
@ -268,7 +198,6 @@ abstract class IndexDocValuesArray extends Source {
for (int i = 0; i < values.length; i++) {
values[i] = input.readLong();
}
maxDocID = numDocs - 1;
}
@Override
@ -282,18 +211,6 @@ abstract class IndexDocValuesArray extends Source {
return values[docID];
}
@Override
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
int maxDoc) throws IOException {
return new FixedIntsEnum(attrSource, input, type(),
bytesPerValue, maxDoc) {
@Override
protected final long toLong(BytesRef bytesRef) {
return bytesRef.asLong();
}
};
}
@Override
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException {
@ -313,13 +230,13 @@ abstract class IndexDocValuesArray extends Source {
private FloatValues(IndexInput input, int numDocs) throws IOException {
super(RamUsageEstimator.NUM_BYTES_FLOAT, ValueType.FLOAT_32);
values = new float[numDocs];
/* we always read BIG_ENDIAN here since the writer serialized plain bytes
* we can simply read the ints / longs
* back in using readInt / readLong */
/*
* we always read BIG_ENDIAN here since the writer serialized plain bytes
* we can simply read the ints / longs back in using readInt / readLong
*/
for (int i = 0; i < values.length; i++) {
values[i] = Float.intBitsToFloat(input.readInt());
}
maxDocID = numDocs - 1;
}
@Override
@ -332,17 +249,11 @@ abstract class IndexDocValuesArray extends Source {
assert docID >= 0 && docID < values.length;
return values[docID];
}
@Override
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
int maxDoc) throws IOException {
return new FloatsEnum(attrSource, input, type(),
bytesPerValue, maxDoc) {
@Override
protected double toDouble(BytesRef bytesRef) {
return Float.intBitsToFloat(bytesRef.asInt());
}
};
void toBytes(double value, BytesRef bytesRef) {
bytesRef.copy(Float.floatToRawIntBits((float)value));
}
@Override
@ -351,7 +262,7 @@ abstract class IndexDocValuesArray extends Source {
return new FloatValues(input, numDocs);
}
};
final static class DoubleValues extends IndexDocValuesArray {
private final double[] values;
@ -363,13 +274,13 @@ abstract class IndexDocValuesArray extends Source {
private DoubleValues(IndexInput input, int numDocs) throws IOException {
super(RamUsageEstimator.NUM_BYTES_DOUBLE, ValueType.FLOAT_64);
values = new double[numDocs];
/* we always read BIG_ENDIAN here since the writer serialized plain bytes
* we can simply read the ints / longs
* back in using readInt / readLong */
/*
* we always read BIG_ENDIAN here since the writer serialized plain bytes
* we can simply read the ints / longs back in using readInt / readLong
*/
for (int i = 0; i < values.length; i++) {
values[i] = Double.longBitsToDouble(input.readLong());
}
maxDocID = numDocs - 1;
}
@Override
@ -383,79 +294,12 @@ abstract class IndexDocValuesArray extends Source {
return values[docID];
}
@Override
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
int maxDoc) throws IOException {
return new FloatsEnum(attrSource, input, type(),
bytesPerValue, maxDoc) {
@Override
protected double toDouble(BytesRef bytesRef) {
return Double.longBitsToDouble(bytesRef.asLong());
}
};
}
@Override
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException {
return new DoubleValues(input, numDocs);
}
};
private abstract static class FixedIntsEnum extends
FixedStraightBytesEnum {
private final ValueType type;
private FixedIntsEnum(AttributeSource source, IndexInput dataIn,
ValueType type, int bytesPerValue, int maxDoc) throws IOException {
super(source, dataIn, bytesPerValue, maxDoc);
this.type = type;
}
@Override
public int advance(int target) throws IOException {
final int advance = super.advance(target);
if (advance != NO_MORE_DOCS) {
intsRef.ints[0] = toLong(this.bytesRef);
}
return advance;
}
protected abstract long toLong(BytesRef bytesRef);
@Override
public ValueType type() {
return type;
}
}
private abstract static class FloatsEnum extends FixedStraightBytesEnum {
private final ValueType type;
FloatsEnum(AttributeSource source, IndexInput dataIn, ValueType type, int bytePerValue, int maxDoc)
throws IOException {
super(source, dataIn, bytePerValue, maxDoc);
this.type = type;
}
@Override
public int advance(int target) throws IOException {
final int retVal = super.advance(target);
if (retVal != NO_MORE_DOCS) {
floatsRef.floats[floatsRef.offset] = toDouble(bytesRef);
}
return retVal;
}
protected abstract double toDouble(BytesRef bytesRef);
@Override
public ValueType type() {
return type;
}
}
}

View File

@ -19,14 +19,9 @@ package org.apache.lucene.index.values;
import java.io.IOException;
import org.apache.lucene.index.values.IndexDocValuesArray.ByteValues;
import org.apache.lucene.index.values.IndexDocValuesArray.IntValues;
import org.apache.lucene.index.values.IndexDocValuesArray.LongValues;
import org.apache.lucene.index.values.IndexDocValuesArray.ShortValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
@ -37,10 +32,13 @@ import org.apache.lucene.util.IOUtils;
* @lucene.experimental
*/
public final class Ints {
protected static final String CODEC_NAME = "Ints";
protected static final int VERSION_START = 0;
protected static final int VERSION_CURRENT = VERSION_START;
private Ints() {
}
public static Writer getWriter(Directory dir, String id, Counter bytesUsed,
ValueType type, IOContext context) throws IOException {
return type == ValueType.VAR_INTS ? new PackedIntValues.PackedIntsWriter(dir, id,
@ -50,15 +48,42 @@ public final class Ints {
public static IndexDocValues getValues(Directory dir, String id, int numDocs,
ValueType type, IOContext context) throws IOException {
return type == ValueType.VAR_INTS ? new PackedIntValues.PackedIntsReader(dir, id,
numDocs, context) : new IntsReader(dir, id, numDocs, context);
numDocs, context) : new IntsReader(dir, id, numDocs, context, type);
}
private static ValueType sizeToType(int size) {
switch (size) {
case 1:
return ValueType.FIXED_INTS_8;
case 2:
return ValueType.FIXED_INTS_16;
case 4:
return ValueType.FIXED_INTS_32;
case 8:
return ValueType.FIXED_INTS_64;
default:
throw new IllegalStateException("illegal size " + size);
}
}
private static int typeToSize(ValueType type) {
switch (type) {
case FIXED_INTS_16:
return 2;
case FIXED_INTS_32:
return 4;
case FIXED_INTS_64:
return 8;
case FIXED_INTS_8:
return 1;
default:
throw new IllegalStateException("illegal type " + type);
}
}
static class IntsWriter extends FixedStraightBytesImpl.Writer {
protected static final String CODEC_NAME = "Ints";
protected static final int VERSION_START = 0;
protected static final int VERSION_CURRENT = VERSION_START;
private final ValueType valueType;
static class IntsWriter extends FixedStraightBytesImpl.Writer {
private final IndexDocValuesArray template;
public IntsWriter(Directory dir, String id, Counter bytesUsed,
IOContext context, ValueType valueType) throws IOException {
@ -68,46 +93,15 @@ public final class Ints {
protected IntsWriter(Directory dir, String id, String codecName,
int version, Counter bytesUsed, IOContext context, ValueType valueType) throws IOException {
super(dir, id, codecName, version, bytesUsed, context);
this.valueType = valueType;
final int expectedSize = getSize(valueType);
final int expectedSize = typeToSize(valueType);
this.bytesRef = new BytesRef(expectedSize);
bytesRef.length = expectedSize;
template = IndexDocValuesArray.TEMPLATES.get(valueType);
}
private static int getSize(ValueType type) {
switch (type) {
case FIXED_INTS_16:
return 2;
case FIXED_INTS_32:
return 4;
case FIXED_INTS_64:
return 8;
case FIXED_INTS_8:
return 1;
default:
throw new IllegalStateException("illegal type " + type);
}
}
@Override
public void add(int docID, long v) throws IOException {
switch (valueType) {
case FIXED_INTS_64:
bytesRef.copy(v);
break;
case FIXED_INTS_32:
bytesRef.copy((int) (0xFFFFFFFF & v));
break;
case FIXED_INTS_16:
bytesRef.copy((short) (0xFFFFL & v));
break;
case FIXED_INTS_8:
bytesRef.bytes[0] = (byte) (0xFFL & v);
break;
default:
throw new IllegalStateException("illegal type " + valueType);
}
template.toBytes(v, bytesRef);
add(docID, bytesRef);
}
@ -116,72 +110,27 @@ public final class Ints {
add(docID, docValues.getInt());
}
}
final static class IntsReader extends FixedStraightBytesImpl.Reader {
private final ValueType type;
final static class IntsReader extends FixedStraightBytesImpl.FixedStraightReader {
private final IndexDocValuesArray arrayTemplate;
IntsReader(Directory dir, String id, int maxDoc, IOContext context)
IntsReader(Directory dir, String id, int maxDoc, IOContext context, ValueType type)
throws IOException {
super(dir, id, IntsWriter.CODEC_NAME, IntsWriter.VERSION_CURRENT, maxDoc,
context);
switch (size) {
case 8:
type = ValueType.FIXED_INTS_64;
arrayTemplate = new LongValues();
break;
case 4:
type = ValueType.FIXED_INTS_32;
arrayTemplate = new IntValues();
break;
case 2:
type = ValueType.FIXED_INTS_16;
arrayTemplate = new ShortValues();
break;
case 1:
type = ValueType.FIXED_INTS_8;
arrayTemplate = new ByteValues();
break;
default:
throw new IllegalStateException("illegal size: " + size);
}
super(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc,
context, type);
arrayTemplate = IndexDocValuesArray.TEMPLATES.get(type);
assert arrayTemplate != null;
assert type == sizeToType(size);
}
@Override
public Source load() throws IOException {
boolean success = false;
IndexInput input = null;
final IndexInput indexInput = cloneData();
try {
input = cloneData();
final Source source = arrayTemplate.newFromInput(input, maxDoc);
success = true;
return source;
return arrayTemplate.newFromInput(indexInput, maxDoc);
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input, datIn);
}
IOUtils.close(indexInput);
}
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
final IndexInput input = cloneData();
boolean success = false;
try {
final ValuesEnum valuesEnum = arrayTemplate.getDirectEnum(source,
input, maxDoc);
success = true;
return valuesEnum;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input);
}
}
}
@Override
public ValueType type() {
return type;
}
}
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.index.values;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ReaderUtil;
@ -28,6 +27,7 @@ import org.apache.lucene.util.ReaderUtil;
* {@link IndexDocValues}
*
* @lucene.experimental
* @lucene.internal
*/
public class MultiIndexDocValues extends IndexDocValues {
@ -56,14 +56,9 @@ public class MultiIndexDocValues extends IndexDocValues {
reset(docValuesIdx);
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
return new MultiValuesEnum(docValuesIdx, starts);
}
@Override
public Source load() throws IOException {
return new MultiSource(docValuesIdx, starts);
return new MultiSource(docValuesIdx, starts, false);
}
public IndexDocValues reset(DocValuesIndex[] docValuesIdx) {
@ -85,11 +80,6 @@ public class MultiIndexDocValues extends IndexDocValues {
this.emptySoruce = new EmptySource(type);
}
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
return emptySoruce.getEnum(attrSource);
}
@Override
public Source load() throws IOException {
return emptySoruce;
@ -99,69 +89,11 @@ public class MultiIndexDocValues extends IndexDocValues {
public ValueType type() {
return emptySoruce.type();
}
}
private static class MultiValuesEnum extends ValuesEnum {
private DocValuesIndex[] docValuesIdx;
private final int maxDoc;
private int currentStart;
private int currentMax;
private int currentDoc = -1;
private ValuesEnum currentEnum;
private final int[] starts;
public MultiValuesEnum(DocValuesIndex[] docValuesIdx, int[] starts)
throws IOException {
super(docValuesIdx[0].docValues.type());
this.docValuesIdx = docValuesIdx;
final DocValuesIndex last = docValuesIdx[docValuesIdx.length - 1];
maxDoc = last.start + last.length;
final DocValuesIndex idx = docValuesIdx[0];
currentEnum = idx.docValues.getEnum(this.attributes());
currentEnum.copyFrom(this);
intsRef = currentEnum.intsRef;
currentMax = idx.length;
currentStart = 0;
this.starts = starts;
}
@Override
public void close() throws IOException {
currentEnum.close();
}
@Override
public int advance(int target) throws IOException {
assert target > currentDoc : "target " + target
+ " must be > than the current doc " + currentDoc;
int relativeDoc = target - currentStart;
do {
if (target >= maxDoc) {// we are beyond max doc
return currentDoc = NO_MORE_DOCS;
}
if (target >= currentMax) {
final int idx = ReaderUtil.subIndex(target, starts);
currentEnum.close();
currentEnum = docValuesIdx[idx].docValues.getEnum();
currentEnum.copyFrom(this);
currentStart = docValuesIdx[idx].start;
currentMax = currentStart + docValuesIdx[idx].length;
relativeDoc = target - currentStart;
}
target = currentMax; // make sure that we advance to the next enum if the current is exhausted
} while ((relativeDoc = currentEnum.advance(relativeDoc)) == NO_MORE_DOCS);
return currentDoc = currentStart + relativeDoc;
}
@Override
public int docID() {
return currentDoc;
}
@Override
public int nextDoc() throws IOException {
return advance(currentDoc + 1);
public Source getDirectSource() throws IOException {
return emptySoruce;
}
}
@ -171,12 +103,14 @@ public class MultiIndexDocValues extends IndexDocValues {
private Source current;
private final int[] starts;
private final DocValuesIndex[] docValuesIdx;
private boolean direct;
public MultiSource(DocValuesIndex[] docValuesIdx, int[] starts) {
public MultiSource(DocValuesIndex[] docValuesIdx, int[] starts, boolean direct) {
super(docValuesIdx[0].docValues.type());
this.docValuesIdx = docValuesIdx;
this.starts = starts;
assert docValuesIdx.length != 0;
this.direct = direct;
}
public long getInt(int docID) {
@ -193,7 +127,11 @@ public class MultiIndexDocValues extends IndexDocValues {
+ " for doc id: " + docID + " slices : " + Arrays.toString(starts);
assert docValuesIdx[idx] != null;
try {
current = docValuesIdx[idx].docValues.getSource();
if (direct) {
current = docValuesIdx[idx].docValues.getDirectSource();
} else {
current = docValuesIdx[idx].docValues.getSource();
}
} catch (IOException e) {
throw new RuntimeException("load failed", e); // TODO how should we
// handle this
@ -214,24 +152,12 @@ public class MultiIndexDocValues extends IndexDocValues {
final int doc = ensureSource(docID);
return current.getBytes(doc, bytesRef);
}
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
throw new UnsupportedOperationException(); // TODO
}
@Override
public ValueType type() {
return docValuesIdx[0].docValues.type();
}
}
private static class EmptySource extends Source {
private final ValueType type;
public EmptySource(ValueType type) {
this.type = type;
super(type);
}
@Override
@ -250,20 +176,15 @@ public class MultiIndexDocValues extends IndexDocValues {
public long getInt(int docID) {
return 0;
}
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
return ValuesEnum.emptyEnum(type);
}
@Override
public ValueType type() {
return type;
}
}
@Override
public ValueType type() {
return this.docValuesIdx[0].docValues.type();
}
@Override
public Source getDirectSource() throws IOException {
return new MultiSource(docValuesIdx, starts, true);
}
}

View File

@ -21,18 +21,15 @@ import java.io.IOException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.values.FixedStraightBytesImpl.FixedBytesWriterBase;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
import org.apache.lucene.index.values.IndexDocValuesArray.LongValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.packed.PackedInts;
/**
@ -51,7 +48,6 @@ class PackedIntValues {
static class PackedIntsWriter extends FixedBytesWriterBase {
private LongsRef intsRef;
private long minValue;
private long maxValue;
private boolean started;
@ -114,10 +110,10 @@ class PackedIntValues {
}
@Override
protected void mergeDoc(int docID) throws IOException {
protected void mergeDoc(int docID, int sourceDoc) throws IOException {
assert docID > lastDocId : "docID: " + docID
+ " must be greater than the last added doc id: " + lastDocId;
add(docID, intsRef.get());
add(docID, currentMergeSource.getInt(sourceDoc));
}
private void writePackedInts(IndexOutput datOut, int docCount) throws IOException {
@ -139,12 +135,6 @@ class PackedIntValues {
w.add(defaultValue);
}
w.finish();
w.finish();
}
@Override
protected void setNextEnum(ValuesEnum valuesEnum) {
intsRef = valuesEnum.getInt();
}
@Override
@ -215,30 +205,17 @@ class PackedIntValues {
datIn.close();
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
final IndexInput input = (IndexInput) datIn.clone();
boolean success = false;
try {
final ValuesEnum inst;
if (values == null) {
inst = new PackedIntsEnumImpl(source, input);
} else {
inst = values.getDirectEnum(source, input, numDocs);
}
success = true;
return inst;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input);
}
}
}
@Override
public ValueType type() {
return ValueType.VAR_INTS;
}
@Override
public Source getDirectSource() throws IOException {
return values != null ? new FixedStraightBytesImpl.DirectFixedStraightSource((IndexInput) datIn.clone(), 8, ValueType.FIXED_INTS_64) : new DirectPackedIntsSource((IndexInput) datIn.clone());
}
}
@ -248,7 +225,7 @@ class PackedIntValues {
private final PackedInts.Reader values;
public PackedIntsSource(IndexInput dataIn) throws IOException {
super(ValueType.VAR_INTS);
minValue = dataIn.readLong();
defaultValue = dataIn.readLong();
values = PackedInts.getReader(dataIn);
@ -263,72 +240,41 @@ class PackedIntValues {
final long value = values.get(docID);
return value == defaultValue ? 0 : minValue + value;
}
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
return new SourceEnum(attrSource, type(), this, values.size()) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs)
return pos = NO_MORE_DOCS;
intsRef.ints[intsRef.offset] = source.getInt(target);
return pos = target;
}
};
}
@Override
public ValueType type() {
return ValueType.VAR_INTS;
}
}
private static final class PackedIntsEnumImpl extends ValuesEnum {
private final PackedInts.ReaderIterator ints;
private static final class DirectPackedIntsSource extends Source {
private final PackedInts.RandomAccessReaderIterator ints;
private long minValue;
private final IndexInput dataIn;
private final long defaultValue;
private final int maxDoc;
private int pos = -1;
private PackedIntsEnumImpl(AttributeSource source, IndexInput dataIn)
private DirectPackedIntsSource(IndexInput dataIn)
throws IOException {
super(source, ValueType.VAR_INTS);
intsRef.offset = 0;
this.dataIn = dataIn;
super(ValueType.VAR_INTS);
minValue = dataIn.readLong();
defaultValue = dataIn.readLong();
this.ints = PackedInts.getReaderIterator(dataIn);
maxDoc = ints.size();
this.ints = PackedInts.getRandomAccessReaderIterator(dataIn);
}
@Override
public void close() throws IOException {
ints.close();
dataIn.close();
public double getFloat(int docID) {
return getInt(docID);
}
@Override
public int advance(int target) throws IOException {
if (target >= maxDoc) {
return pos = NO_MORE_DOCS;
public BytesRef getBytes(int docID, BytesRef ref) {
ref.grow(8);
ref.copy(getInt(docID));
return ref;
}
@Override
public long getInt(int docID) {
try {
final long val = ints.get(docID);
return val == defaultValue ? 0 : minValue + val;
} catch (IOException e) {
throw new RuntimeException(e);
}
final long val = ints.advance(target);
intsRef.ints[intsRef.offset] = val == defaultValue ? 0 : minValue + val;
return pos = target;
}
@Override
public int docID() {
return pos;
}
@Override
public int nextDoc() throws IOException {
if (pos >= maxDoc) {
return pos = NO_MORE_DOCS;
}
return advance(pos + 1);
}
}

View File

@ -18,36 +18,29 @@ package org.apache.lucene.index.values;
*/
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.values.IndexDocValues.SortedSource;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.util.BytesRef;
/**
* Abstract base class for {@link IndexDocValues} {@link Source} /
* {@link SortedSource} cache.
* Abstract base class for {@link IndexDocValues} {@link Source} cache.
* <p>
* {@link Source} and {@link SortedSource} instances loaded via
* {@link IndexDocValues#load()} and {@link IndexDocValues#loadSorted(Comparator)} are
* entirely memory resident and need to be maintained by the caller. Each call
* to {@link IndexDocValues#load()} or {@link IndexDocValues#loadSorted(Comparator)} will
* cause an entire reload of the underlying data. Source and
* {@link SortedSource} instances obtained from {@link IndexDocValues#getSource()}
* and {@link IndexDocValues#getSource()} respectively are maintained by a
* {@link SourceCache} that is closed ({@link #close(IndexDocValues)}) once the
* {@link IndexReader} that created the {@link IndexDocValues} instance is closed.
* {@link Source} instances loaded via {@link IndexDocValues#load()} are entirely memory resident
* and need to be maintained by the caller. Each call to
* {@link IndexDocValues#load()} will cause an entire reload of
* the underlying data. Source instances obtained from
* {@link IndexDocValues#getSource()} and {@link IndexDocValues#getSource()}
* respectively are maintained by a {@link SourceCache} that is closed (
* {@link #close(IndexDocValues)}) once the {@link IndexReader} that created the
* {@link IndexDocValues} instance is closed.
* <p>
* Unless {@link Source} and {@link SortedSource} instances are managed by
* another entity it is recommended to use the cached variants to obtain a
* source instance.
* Unless {@link Source} instances are managed by another entity it is
* recommended to use the cached variants to obtain a source instance.
* <p>
* Implementation of this API must be thread-safe.
*
* @see IndexDocValues#setCache(SourceCache)
* @see IndexDocValues#getSource()
* @see IndexDocValues#getSortedSorted(Comparator)
*
* @lucene.experimental
*/
@ -63,17 +56,7 @@ public abstract class SourceCache {
public abstract Source load(IndexDocValues values) throws IOException;
/**
* Atomically loads a {@link SortedSource} into the cache from the given
* {@link IndexDocValues} and returns it iff no other {@link SortedSource} has
* already been cached. Otherwise the cached source is returned.
* <p>
* This method will not return <code>null</code>
*/
public abstract SortedSource loadSorted(IndexDocValues values,
Comparator<BytesRef> comp) throws IOException;
/**
* Atomically invalidates the cached {@link Source} and {@link SortedSource}
* Atomically invalidates the cached {@link Source}
* instances if any and empties the cache.
*/
public abstract void invalidate(IndexDocValues values);
@ -87,14 +70,13 @@ public abstract class SourceCache {
/**
* Simple per {@link IndexDocValues} instance cache implementation that holds a
* {@link Source} and {@link SortedSource} reference as a member variable.
* {@link Source} a member variable.
* <p>
* If a {@link DirectSourceCache} instance is closed or invalidated the cached
* reference are simply set to <code>null</code>
*/
public static final class DirectSourceCache extends SourceCache {
private Source ref;
private SortedSource sortedRef;
public synchronized Source load(IndexDocValues values) throws IOException {
if (ref == null) {
@ -103,17 +85,8 @@ public abstract class SourceCache {
return ref;
}
public synchronized SortedSource loadSorted(IndexDocValues values,
Comparator<BytesRef> comp) throws IOException {
if (sortedRef == null) {
sortedRef = values.loadSorted(comp);
}
return sortedRef;
}
public synchronized void invalidate(IndexDocValues values) {
ref = null;
sortedRef = null;
}
}

View File

@ -43,9 +43,8 @@ public enum ValueType {
* <p>
* NOTE: this type uses <tt>0</tt> as the default value without any
* distinction between provided <tt>0</tt> values during indexing. All
* documents without an explicit value will use <tt>0</tt> instead. In turn,
* {@link ValuesEnum} instances will not skip documents without an explicit
* value assigned. Custom default values must be assigned explicitly.
* documents without an explicit value will use <tt>0</tt> instead.
* Custom default values must be assigned explicitly.
* </p>
*/
VAR_INTS,
@ -56,9 +55,8 @@ public enum ValueType {
* <p>
* NOTE: this type uses <tt>0</tt> as the default value without any
* distinction between provided <tt>0</tt> values during indexing. All
* documents without an explicit value will use <tt>0</tt> instead. In turn,
* {@link ValuesEnum} instances will not skip documents without an explicit
* value assigned. Custom default values must be assigned explicitly.
* documents without an explicit value will use <tt>0</tt> instead.
* Custom default values must be assigned explicitly.
* </p>
*/
FIXED_INTS_8,
@ -69,9 +67,8 @@ public enum ValueType {
* <p>
* NOTE: this type uses <tt>0</tt> as the default value without any
* distinction between provided <tt>0</tt> values during indexing. All
* documents without an explicit value will use <tt>0</tt> instead. In turn,
* {@link ValuesEnum} instances will not skip documents without an explicit
* value assigned. Custom default values must be assigned explicitly.
* documents without an explicit value will use <tt>0</tt> instead.
* Custom default values must be assigned explicitly.
* </p>
*/
FIXED_INTS_16,
@ -82,9 +79,8 @@ public enum ValueType {
* <p>
* NOTE: this type uses <tt>0</tt> as the default value without any
* distinction between provided <tt>0</tt> values during indexing. All
* documents without an explicit value will use <tt>0</tt> instead. In turn,
* {@link ValuesEnum} instances will not skip documents without an explicit
* value assigned. Custom default values must be assigned explicitly.
* documents without an explicit value will use <tt>0</tt> instead.
* Custom default values must be assigned explicitly.
* </p>
*/
FIXED_INTS_32,
@ -95,9 +91,8 @@ public enum ValueType {
* <p>
* NOTE: this type uses <tt>0</tt> as the default value without any
* distinction between provided <tt>0</tt> values during indexing. All
* documents without an explicit value will use <tt>0</tt> instead. In turn,
* {@link ValuesEnum} instances will not skip documents without an explicit
* value assigned. Custom default values must be assigned explicitly.
* documents without an explicit value will use <tt>0</tt> instead.
* Custom default values must be assigned explicitly.
* </p>
*/
FIXED_INTS_64,
@ -110,9 +105,8 @@ public enum ValueType {
* <p>
* NOTE: this type uses <tt>0.0f</tt> as the default value without any
* distinction between provided <tt>0.0f</tt> values during indexing. All
* documents without an explicit value will use <tt>0.0f</tt> instead. In
* turn, {@link ValuesEnum} instances will not skip documents without an
* explicit value assigned. Custom default values must be assigned explicitly.
* documents without an explicit value will use <tt>0.0f</tt> instead.
* Custom default values must be assigned explicitly.
* </p>
*/
FLOAT_32,
@ -126,9 +120,8 @@ public enum ValueType {
* <p>
* NOTE: this type uses <tt>0.0d</tt> as the default value without any
* distinction between provided <tt>0.0d</tt> values during indexing. All
* documents without an explicit value will use <tt>0.0d</tt> instead. In
* turn, {@link ValuesEnum} instances will not skip documents without an
* explicit value assigned. Custom default values must be assigned explicitly.
* documents without an explicit value will use <tt>0.0d</tt> instead.
* Custom default values must be assigned explicitly.
* </p>
*/
FLOAT_64,
@ -143,9 +136,7 @@ public enum ValueType {
* NOTE: this type uses <tt>0 byte</tt> filled byte[] based on the length of the first seen
* value as the default value without any distinction between explicitly
* provided values during indexing. All documents without an explicit value
* will use the default instead. In turn, {@link ValuesEnum} instances will
* not skip documents without an explicit value assigned. Custom default
* values must be assigned explicitly.
* will use the default instead.Custom default values must be assigned explicitly.
* </p>
*/
BYTES_FIXED_STRAIGHT,
@ -159,33 +150,11 @@ public enum ValueType {
* NOTE: Fields of this type will not store values for documents without and
* explicitly provided value. If a documents value is accessed while no
* explicit value is stored the returned {@link BytesRef} will be a 0-length
* reference. In turn, {@link ValuesEnum} instances will skip over documents
* without an explicit value assigned. Custom default values must be assigned
* explicitly.
* reference. Custom default values must be assigned explicitly.
* </p>
*/
BYTES_FIXED_DEREF,
/**
* A fixed length pre-sorted byte[] variant. Fields with this type only
* store distinct byte values and store an additional offset pointer per
* document to dereference the shared byte[]. The stored
* byte[] is presorted, by default by unsigned byte order,
* and allows access via document id, ordinal and by-value.
* Use this type if your documents may share the same byte[].
* <p>
* NOTE: Fields of this type will not store values for documents without and
* explicitly provided value. If a documents value is accessed while no
* explicit value is stored the returned {@link BytesRef} will be a 0-length
* reference. In turn, {@link ValuesEnum} instances will skip over documents
* without an explicit value assigned. Custom default values must be assigned
* explicitly.
* </p>
*
* @see SortedSource
*/
BYTES_FIXED_SORTED,
/**
* Variable length straight stored byte[] variant. All bytes are
* stored sequentially for compactness. Usage of this type via the
@ -195,9 +164,7 @@ public enum ValueType {
* NOTE: Fields of this type will not store values for documents without an
* explicitly provided value. If a documents value is accessed while no
* explicit value is stored the returned {@link BytesRef} will be a 0-length
* byte[] reference. In contrast to dereferenced variants, {@link ValuesEnum}
* instances will <b>not</b> skip over documents without an explicit value
* assigned. Custom default values must be assigned explicitly.
* byte[] reference. Custom default values must be assigned explicitly.
* </p>
*/
BYTES_VAR_STRAIGHT,
@ -210,13 +177,12 @@ public enum ValueType {
* NOTE: Fields of this type will not store values for documents without and
* explicitly provided value. If a documents value is accessed while no
* explicit value is stored the returned {@link BytesRef} will be a 0-length
* reference. In turn, {@link ValuesEnum} instances will skip over documents
* without an explicit value assigned. Custom default values must be assigned
* explicitly.
* reference. Custom default values must be assigned explicitly.
* </p>
*/
BYTES_VAR_DEREF,
/**
* A variable length pre-sorted byte[] variant. Just like
* {@link #BYTES_FIXED_SORTED}, but allowing each
@ -225,12 +191,30 @@ public enum ValueType {
* NOTE: Fields of this type will not store values for documents without and
* explicitly provided value. If a documents value is accessed while no
* explicit value is stored the returned {@link BytesRef} will be a 0-length
* reference. In turn, {@link ValuesEnum} instances will skip over documents
* without an explicit value assigned. Custom default values must be assigned
* reference.Custom default values must be assigned explicitly.
* </p>
*
* @see SortedSource
*/
BYTES_VAR_SORTED,
/**
* A fixed length pre-sorted byte[] variant. Fields with this type only
* store distinct byte values and store an additional offset pointer per
* document to dereference the shared byte[]. The stored
* byte[] is presorted, by default by unsigned byte order,
* and allows access via document id, ordinal and by-value.
* Use this type if your documents may share the same byte[].
* <p>
* NOTE: Fields of this type will not store values for documents without and
* explicitly provided value. If a documents value is accessed while no
* explicit value is stored the returned {@link BytesRef} will be a 0-length
* reference. Custom default values must be assigned
* explicitly.
* </p>
*
* @see SortedSource
*/
BYTES_VAR_SORTED
BYTES_FIXED_SORTED
}

View File

@ -1,156 +0,0 @@
package org.apache.lucene.index.values;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FloatsRef;
import org.apache.lucene.util.LongsRef;
/**
* {@link ValuesEnum} is a {@link DocIdSetIterator} iterating <tt>byte[]</tt>
* , <tt>long</tt> and <tt>double</tt> stored per document. Depending on the
* enum's {@link ValueType} ({@link #type()}) the enum might skip over documents that
* have no value stored. Types like {@link ValueType#BYTES_VAR_STRAIGHT} might not
* skip over documents even if there is no value associated with a document. The
* value for document without values again depends on the types implementation
* although a reference for a {@link ValueType} returned from a accessor method
* {@link #getFloat()}, {@link #getInt()} or {@link #bytes()} will never be
* <code>null</code> even if a document has no value.
* <p>
* Note: Only the reference for the enum's type are initialized to non
* <code>null</code> ie. {@link #getInt()} will always return <code>null</code>
* if the enum's Type is {@link ValueType#FLOAT_32}.
*
* @lucene.experimental
*/
public abstract class ValuesEnum extends DocIdSetIterator {
private AttributeSource source;
private final ValueType enumType;
protected BytesRef bytesRef = new BytesRef(1);
protected FloatsRef floatsRef = new FloatsRef(1);
protected LongsRef intsRef = new LongsRef(1);
/**
* Creates a new {@link ValuesEnum} for the given type. The
* {@link AttributeSource} for this enum is set to <code>null</code>
*/
protected ValuesEnum(ValueType enumType) {
this(null, enumType);
}
/**
* Creates a new {@link ValuesEnum} for the given type.
*/
protected ValuesEnum(AttributeSource source, ValueType enumType) {
this.source = source;
this.enumType = enumType;
}
/**
* Returns the type of this enum
*/
public ValueType type() {
return enumType;
}
/**
* Returns a {@link BytesRef} or <code>null</code> if this enum doesn't
* enumerate byte[] values
*/
public BytesRef bytes() {
return bytesRef;
}
/**
* Returns a {@link FloatsRef} or <code>null</code> if this enum doesn't
* enumerate floating point values
*/
public FloatsRef getFloat() {
return floatsRef;
}
/**
* Returns a {@link LongsRef} or <code>null</code> if this enum doesn't
* enumerate integer values.
*/
public LongsRef getInt() {
return intsRef;
}
/**
* Copies the internal state from the given enum
*/
protected void copyFrom(ValuesEnum valuesEnum) {
intsRef = valuesEnum.intsRef;
floatsRef = valuesEnum.floatsRef;
bytesRef = valuesEnum.bytesRef;
source = valuesEnum.source;
}
/**
* Returns the {@link AttributeSource} associated with this enum.
* <p>
* Note: this method might create a new AttribueSource if no
* {@link AttributeSource} has been provided during enum creation.
*/
public AttributeSource attributes() {
if (source == null) {
source = new AttributeSource();
}
return source;
}
/**
* Closes the enum
*
* @throws IOException
* if an {@link IOException} occurs
*/
public abstract void close() throws IOException;
/**
* Returns an empty {@link ValuesEnum} for the given {@link ValueType}.
*/
public static ValuesEnum emptyEnum(ValueType type) {
return new ValuesEnum(type) {
@Override
public int nextDoc() throws IOException {
return NO_MORE_DOCS;
}
@Override
public int docID() {
return NO_MORE_DOCS;
}
@Override
public int advance(int target) throws IOException {
return NO_MORE_DOCS;
}
@Override
public void close() throws IOException {
}
};
}
}

View File

@ -20,16 +20,17 @@ package org.apache.lucene.index.values;
import java.io.IOException;
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
import org.apache.lucene.index.values.Bytes.DerefBytesSourceBase;
import org.apache.lucene.index.values.Bytes.DerefBytesEnumBase;
import org.apache.lucene.index.values.Bytes.BytesSourceBase;
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
import org.apache.lucene.index.values.DirectSource;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.PackedInts;
// Stores variable-length byte[] by deref, ie when two docs
// have the same value, they store only 1 byte[] and both
@ -57,6 +58,7 @@ class VarDerefBytesImpl {
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
size = 0;
}
@Override
@ -68,88 +70,82 @@ class VarDerefBytesImpl {
// some last docs that we didn't see
@Override
public void finishInternal(int docCount) throws IOException {
fillDefault(docCount);
final int size = hash.size();
final long[] addresses = new long[size+1];
final long[] addresses = new long[size];
final IndexOutput datOut = getOrCreateDataOut();
int addr = 1;
int addr = 0;
final BytesRef bytesRef = new BytesRef();
for (int i = 0; i < size; i++) {
hash.get(i, bytesRef);
addresses[i+1] = addr;
addresses[i] = addr;
addr += writePrefixLength(datOut, bytesRef) + bytesRef.length;
datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
}
final IndexOutput idxOut = getOrCreateIndexOut();
// write the max address to read directly on source load
idxOut.writeLong(addr - 1);
writeIndex(idxOut, docCount, addresses[size], addresses, docToEntry);
idxOut.writeLong(addr);
writeIndex(idxOut, docCount, addresses[addresses.length-1], addresses, docToEntry);
}
}
public static class Reader extends BytesReaderBase {
public static class VarDerefReader extends BytesReaderBase {
private final long totalBytes;
Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_START, true, context);
VarDerefReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_START, true, context, ValueType.BYTES_VAR_DEREF);
totalBytes = idxIn.readLong();
}
@Override
public Source load() throws IOException {
return new Source(cloneData(), cloneIndex(), totalBytes);
return new VarDerefSource(cloneData(), cloneIndex(), totalBytes);
}
@Override
public Source getDirectSource()
throws IOException {
return new DirectVarDerefSource(cloneData(), cloneIndex(), type());
}
}
final static class VarDerefSource extends BytesSourceBase {
private final PackedInts.Reader addresses;
private final static class Source extends DerefBytesSourceBase {
public Source(IndexInput datIn, IndexInput idxIn, long totalBytes)
throws IOException {
super(datIn, idxIn, totalBytes, ValueType.BYTES_VAR_DEREF);
}
@Override
public BytesRef getBytes(int docID, BytesRef bytesRef) {
long address = addresses.get(docID);
bytesRef.length = 0;
return address == 0 ? bytesRef : data.fillSliceWithPrefix(bytesRef,
--address);
}
public VarDerefSource(IndexInput datIn, IndexInput idxIn, long totalBytes)
throws IOException {
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), totalBytes,
ValueType.BYTES_VAR_DEREF);
addresses = PackedInts.getReader(idxIn);
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
return new VarDerefBytesEnum(source, cloneData(), cloneIndex());
public BytesRef getBytes(int docID, BytesRef bytesRef) {
return data.fillSliceWithPrefix(bytesRef,
addresses.get(docID));
}
}
final static class VarDerefBytesEnum extends DerefBytesEnumBase {
public VarDerefBytesEnum(AttributeSource source, IndexInput datIn,
IndexInput idxIn) throws IOException {
super(source, datIn, idxIn, -1, ValueType.BYTES_VAR_DEREF);
}
final static class DirectVarDerefSource extends DirectSource {
private final PackedInts.RandomAccessReaderIterator index;
@Override
protected void fill(long address, BytesRef ref) throws IOException {
datIn.seek(fp + --address);
final byte sizeByte = datIn.readByte();
final int size;
if ((sizeByte & 128) == 0) {
// length is 1 byte
size = sizeByte;
} else {
size = ((sizeByte & 0x7f) << 8) | ((datIn.readByte() & 0xff));
}
if (ref.bytes.length < size) {
ref.grow(size);
}
ref.length = size;
ref.offset = 0;
datIn.readBytes(ref.bytes, 0, size);
}
DirectVarDerefSource(IndexInput data, IndexInput index, ValueType type)
throws IOException {
super(data, type);
this.index = PackedInts.getRandomAccessReaderIterator(index);
}
@Override
public ValueType type() {
return ValueType.BYTES_VAR_DEREF;
protected int position(int docID) throws IOException {
data.seek(baseOffset + index.get(docID));
final byte sizeByte = data.readByte();
if ((sizeByte & 128) == 0) {
// length is 1 byte
return sizeByte;
} else {
return ((sizeByte & 0x7f) << 8) | ((data.readByte() & 0xff));
}
}
}
}

View File

@ -23,11 +23,11 @@ import java.util.Comparator;
import org.apache.lucene.index.values.Bytes.BytesSortedSourceBase;
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
import org.apache.lucene.index.values.IndexDocValues.SortedSource;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.packed.PackedInts;
@ -39,7 +39,7 @@ import org.apache.lucene.util.packed.PackedInts;
/**
* @lucene.experimental
*/
class VarSortedBytesImpl {
final class VarSortedBytesImpl {
static final String CODEC_NAME = "VarDerefBytes";
static final int VERSION_START = 0;
@ -52,8 +52,9 @@ class VarSortedBytesImpl {
Counter bytesUsed, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
this.comp = comp;
size = 0;
}
@Override
protected void checkSize(BytesRef bytes) {
// allow var bytes sizes
@ -63,11 +64,11 @@ class VarSortedBytesImpl {
// some last docs that we didn't see
@Override
public void finishInternal(int docCount) throws IOException {
fillDefault(docCount);
final int count = hash.size();
final IndexOutput datOut = getOrCreateDataOut();
long offset = 0;
long lastOffset = 0;
final int[] index = new int[count+1];
final int[] index = new int[count];
final long[] offsets = new long[count];
final int[] sortedEntries = hash.sort(comp);
// first dump bytes data, recording index & offset as
@ -75,173 +76,125 @@ class VarSortedBytesImpl {
for (int i = 0; i < count; i++) {
final int e = sortedEntries[i];
offsets[i] = offset;
index[e+1] = 1 + i;
index[e] = i;
final BytesRef bytes = hash.get(e, new BytesRef());
// TODO: we could prefix code...
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
lastOffset = offset;
offset += bytes.length;
}
final IndexOutput idxOut = getOrCreateIndexOut();
// total bytes of data
idxOut.writeLong(offset);
// write index -- first doc -> 1+ord
// write index
writeIndex(idxOut, docCount, count, index, docToEntry);
// next ord (0-based) -> offset
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count,
PackedInts.bitsRequired(lastOffset));
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count+1,
PackedInts.bitsRequired(offset));
for (int i = 0; i < count; i++) {
offsetWriter.add(offsets[i]);
}
offsetWriter.add(offset);
offsetWriter.finish();
}
}
public static class Reader extends BytesReaderBase {
private final Comparator<BytesRef> defaultComp;
private final Comparator<BytesRef> comparator;
Reader(Directory dir, String id, int maxDoc,
Comparator<BytesRef> comparator, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_START, true, context);
this.defaultComp = comparator;
IOContext context, ValueType type, Comparator<BytesRef> comparator)
throws IOException {
super(dir, id, CODEC_NAME, VERSION_START, true, context, type);
this.comparator = comparator;
}
@Override
public org.apache.lucene.index.values.IndexDocValues.Source load()
throws IOException {
return loadSorted(defaultComp);
return new VarSortedSource(cloneData(), cloneIndex(), comparator);
}
@Override
public SortedSource loadSorted(Comparator<BytesRef> comp)
throws IOException {
IndexInput indexIn = cloneIndex();
return new Source(cloneData(), indexIn, comp, indexIn.readLong());
public Source getDirectSource() throws IOException {
return new DirectSortedSource(cloneData(), cloneIndex(), comparator, type());
}
}
private static final class VarSortedSource extends BytesSortedSourceBase {
private final PackedInts.Reader ordToOffsetIndex; // 0-based
private final int valueCount;
VarSortedSource(IndexInput datIn, IndexInput idxIn,
Comparator<BytesRef> comp) throws IOException {
super(datIn, idxIn, comp, idxIn.readLong(), ValueType.BYTES_VAR_SORTED);
ordToOffsetIndex = PackedInts.getReader(idxIn);
valueCount = ordToOffsetIndex.size()-1; // the last value here is just a dummy value to get the length of the last value
closeIndexInput();
}
private static class Source extends BytesSortedSourceBase {
private final PackedInts.Reader ordToOffsetIndex; // 0-based
private final long totBytes;
private final int valueCount;
@Override
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
final long offset = ordToOffsetIndex.get(ord);
final long nextOffset = ordToOffsetIndex.get(1 + ord);
data.fillSlice(bytesRef, offset, (int) (nextOffset - offset));
return bytesRef;
}
public Source(IndexInput datIn, IndexInput idxIn,
Comparator<BytesRef> comp, long dataLength) throws IOException {
super(datIn, idxIn, comp, dataLength, ValueType.BYTES_VAR_SORTED);
totBytes = dataLength;
ordToOffsetIndex = PackedInts.getReader(idxIn);
valueCount = ordToOffsetIndex.size();
closeIndexInput();
}
@Override
public int getValueCount() {
return valueCount;
}
}
@Override
public int getByValue(BytesRef bytes, BytesRef tmpRef) {
return binarySearch(bytes, tmpRef, 0, valueCount - 1);
}
@Override
public int getValueCount() {
return valueCount;
}
private static final class DirectSortedSource extends SortedSource {
private final PackedInts.Reader docToOrdIndex;
private final PackedInts.RandomAccessReaderIterator ordToOffsetIndex;
private final IndexInput datIn;
private final long basePointer;
private final int valueCount;
DirectSortedSource(IndexInput datIn, IndexInput idxIn,
Comparator<BytesRef> comparator, ValueType type) throws IOException {
super(type, comparator);
idxIn.readLong();
docToOrdIndex = PackedInts.getReader(idxIn); // read the ords in to prevent too many random disk seeks
ordToOffsetIndex = PackedInts.getRandomAccessReaderIterator(idxIn);
valueCount = ordToOffsetIndex.size()-1; // the last value here is just a dummy value to get the length of the last value
basePointer = datIn.getFilePointer();
this.datIn = datIn;
}
// ord is 0-based
@Override
protected BytesRef deref(int ord, BytesRef bytesRef) {
final long nextOffset;
if (ord == valueCount - 1) {
nextOffset = totBytes;
} else {
nextOffset = ordToOffsetIndex.get(1 + ord);
}
@Override
public int ord(int docID) {
return (int) docToOrdIndex.get(docID);
}
@Override
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
try {
final long offset = ordToOffsetIndex.get(ord);
data.fillSlice(bytesRef, offset, (int) (nextOffset - offset));
return bytesRef;
}
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
return new VarSortedBytesEnum(source, cloneData(), cloneIndex());
}
private static class VarSortedBytesEnum extends ValuesEnum {
private PackedInts.Reader docToOrdIndex;
private PackedInts.Reader ordToOffsetIndex;
private IndexInput idxIn;
private IndexInput datIn;
private int valueCount;
private long totBytes;
private int docCount;
private int pos = -1;
private final long fp;
protected VarSortedBytesEnum(AttributeSource source, IndexInput datIn,
IndexInput idxIn) throws IOException {
super(source, ValueType.BYTES_VAR_SORTED);
totBytes = idxIn.readLong();
// keep that in memory to prevent lots of disk seeks
docToOrdIndex = PackedInts.getReader(idxIn);
ordToOffsetIndex = PackedInts.getReader(idxIn);
valueCount = ordToOffsetIndex.size();
docCount = docToOrdIndex.size();
fp = datIn.getFilePointer();
this.idxIn = idxIn;
this.datIn = datIn;
}
@Override
public void close() throws IOException {
idxIn.close();
datIn.close();
}
@Override
public int advance(int target) throws IOException {
if (target >= docCount) {
return pos = NO_MORE_DOCS;
}
int ord;
while ((ord = (int) docToOrdIndex.get(target)) == 0) {
if (++target >= docCount) {
return pos = NO_MORE_DOCS;
}
}
final long offset = ordToOffsetIndex.get(--ord);
final long nextOffset;
if (ord == valueCount - 1) {
nextOffset = totBytes;
} else {
nextOffset = ordToOffsetIndex.get(1 + ord);
}
final long nextOffset = ordToOffsetIndex.next();
datIn.seek(basePointer + offset);
final int length = (int) (nextOffset - offset);
datIn.seek(fp + offset);
if (bytesRef.bytes.length < length)
if (bytesRef.bytes.length < length) {
bytesRef.grow(length);
}
datIn.readBytes(bytesRef.bytes, 0, length);
bytesRef.length = length;
bytesRef.offset = 0;
return pos = target;
}
return bytesRef;
} catch (IOException ex) {
throw new IllegalStateException("failed", ex);
@Override
public int docID() {
return pos;
}
@Override
public int nextDoc() throws IOException {
if (pos >= docCount) {
return pos = NO_MORE_DOCS;
}
return advance(pos + 1);
}
}
@Override
public ValueType type() {
return ValueType.BYTES_VAR_SORTED;
public int getValueCount() {
return valueCount;
}
}
}

View File

@ -20,18 +20,19 @@ package org.apache.lucene.index.values;
import java.io.IOException;
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
import org.apache.lucene.index.values.Bytes.BytesSourceBase;
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
import org.apache.lucene.index.values.Bytes.DerefBytesSourceBase;
import org.apache.lucene.index.values.DirectSource;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
import org.apache.lucene.util.packed.PackedInts;
@ -66,7 +67,7 @@ class VarStraightBytesImpl {
}
// Fills up to but not including this docID
private void fill(final int docID) {
private void fill(final int docID, final long nextAddress) {
if (docID >= docToAddress.length) {
int oldSize = docToAddress.length;
docToAddress = ArrayUtil.grow(docToAddress, 1 + docID);
@ -74,7 +75,7 @@ class VarStraightBytesImpl {
* RamUsageEstimator.NUM_BYTES_INT);
}
for (int i = lastDocID + 1; i < docID; i++) {
docToAddress[i] = address;
docToAddress[i] = nextAddress;
}
}
@ -84,7 +85,7 @@ class VarStraightBytesImpl {
if (bytes.length == 0) {
return; // default
}
fill(docID);
fill(docID, address);
docToAddress[docID] = address;
pool.copy(bytes);
address += bytes.length;
@ -97,15 +98,15 @@ class VarStraightBytesImpl {
datOut = getOrCreateDataOut();
boolean success = false;
try {
if (state.liveDocs == null && state.reader instanceof Reader) {
if (state.liveDocs == null && state.reader instanceof VarStraightReader) {
// bulk merge since we don't have any deletes
Reader reader = (Reader) state.reader;
VarStraightReader reader = (VarStraightReader) state.reader;
final int maxDocs = reader.maxDoc;
if (maxDocs == 0) {
return;
}
if (lastDocID+1 < state.docBase) {
fill(state.docBase);
fill(state.docBase, address);
lastDocID = state.docBase-1;
}
final long numDataBytes;
@ -147,13 +148,14 @@ class VarStraightBytesImpl {
}
@Override
protected void mergeDoc(int docID) throws IOException {
protected void mergeDoc(int docID, int sourceDoc) throws IOException {
assert merge;
assert lastDocID < docID;
currentMergeSource.getBytes(sourceDoc, bytesRef);
if (bytesRef.length == 0) {
return; // default
}
fill(docID);
fill(docID, address);
datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
docToAddress[docID] = address;
address += bytesRef.length;
@ -186,20 +188,21 @@ class VarStraightBytesImpl {
try {
if (lastDocID == -1) {
idxOut.writeVLong(0);
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1,
PackedInts.bitsRequired(0));
for (int i = 0; i < docCount; i++) {
for (int i = 0; i < docCount+1; i++) {
w.add(0);
}
w.finish();
} else {
fill(docCount);
fill(docCount, address);
idxOut.writeVLong(address);
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1,
PackedInts.bitsRequired(address));
for (int i = 0; i < docCount; i++) {
w.add(docToAddress[i]);
}
w.add(address);
w.finish();
}
success = true;
@ -220,115 +223,59 @@ class VarStraightBytesImpl {
}
}
public static class Reader extends BytesReaderBase {
public static class VarStraightReader extends BytesReaderBase {
private final int maxDoc;
Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_START, true, context);
VarStraightReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_START, true, context, ValueType.BYTES_VAR_STRAIGHT);
this.maxDoc = maxDoc;
}
@Override
public Source load() throws IOException {
return new Source(cloneData(), cloneIndex());
}
private class Source extends DerefBytesSourceBase {
public Source(IndexInput datIn, IndexInput idxIn) throws IOException {
super(datIn, idxIn, idxIn.readVLong(), ValueType.BYTES_VAR_STRAIGHT);
}
@Override
public BytesRef getBytes(int docID, BytesRef bytesRef) {
final long address = addresses.get(docID);
final int length = docID == maxDoc - 1 ? (int) (totalLengthInBytes - address)
: (int) (addresses.get(1 + docID) - address);
return data.fillSlice(bytesRef, address, length);
}
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
return new SourceEnum(attrSource, type(), this, maxDoc()) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs) {
return pos = NO_MORE_DOCS;
}
source.getBytes(target, bytesRef);
return pos = target;
}
};
}
return new VarStraightSource(cloneData(), cloneIndex());
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
return new VarStraightBytesEnum(source, cloneData(), cloneIndex());
public Source getDirectSource()
throws IOException {
return new DirectVarStraightSource(cloneData(), cloneIndex(), type());
}
}
private static final class VarStraightSource extends BytesSourceBase {
private final PackedInts.Reader addresses;
private class VarStraightBytesEnum extends ValuesEnum {
private final PackedInts.ReaderIterator addresses;
private final IndexInput datIn;
private final IndexInput idxIn;
private final long fp;
private final long totBytes;
private int pos = -1;
private long nextAddress;
protected VarStraightBytesEnum(AttributeSource source, IndexInput datIn,
IndexInput idxIn) throws IOException {
super(source, ValueType.BYTES_VAR_STRAIGHT);
totBytes = idxIn.readVLong();
fp = datIn.getFilePointer();
addresses = PackedInts.getReaderIterator(idxIn);
this.datIn = datIn;
this.idxIn = idxIn;
nextAddress = addresses.next();
}
@Override
public void close() throws IOException {
datIn.close();
idxIn.close();
}
@Override
public int advance(final int target) throws IOException {
if (target >= maxDoc) {
return pos = NO_MORE_DOCS;
}
final long addr = pos+1 == target ? nextAddress : addresses.advance(target);
if (addr == totBytes) { // empty values at the end
bytesRef.length = 0;
bytesRef.offset = 0;
return pos = target;
}
datIn.seek(fp + addr);
final int size = (int) (target == maxDoc - 1 ? totBytes - addr
: (nextAddress = addresses.next()) - addr);
if (bytesRef.bytes.length < size) {
bytesRef.grow(size);
}
bytesRef.length = size;
datIn.readBytes(bytesRef.bytes, 0, size);
return pos = target;
}
@Override
public int docID() {
return pos;
}
@Override
public int nextDoc() throws IOException {
return advance(pos + 1);
}
public VarStraightSource(IndexInput datIn, IndexInput idxIn) throws IOException {
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), idxIn.readVLong(),
ValueType.BYTES_VAR_STRAIGHT);
addresses = PackedInts.getReader(idxIn);
}
@Override
public ValueType type() {
return ValueType.BYTES_VAR_STRAIGHT;
public BytesRef getBytes(int docID, BytesRef bytesRef) {
final long address = addresses.get(docID);
return data.fillSlice(bytesRef, address,
(int) (addresses.get(docID + 1) - address));
}
}
public final static class DirectVarStraightSource extends DirectSource {
private final PackedInts.RandomAccessReaderIterator index;
DirectVarStraightSource(IndexInput data, IndexInput index, ValueType type)
throws IOException {
super(data, type);
index.readVLong();
this.index = PackedInts.getRandomAccessReaderIterator(index);
}
@Override
protected int position(int docID) throws IOException {
final long offset = index.get(docID);
data.seek(baseOffset + offset);
return (int) (index.next() - offset);
}
}
}

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.index.codecs.DocValuesConsumer;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Bits;
@ -39,7 +40,7 @@ import org.apache.lucene.util.Counter;
* @lucene.experimental
*/
public abstract class Writer extends DocValuesConsumer {
protected Source currentMergeSource;
/**
* Creates a new {@link Writer}.
*
@ -99,31 +100,32 @@ public abstract class Writer extends DocValuesConsumer {
}
/**
* Records a value from the given document id. The methods implementation
* obtains the value for the document id from the last {@link ValuesEnum}
* set to {@link #setNextEnum(ValuesEnum)}.
* Merges a document with the given <code>docID</code>. The methods
* implementation obtains the value for the <i>sourceDoc</i> id from the
* current {@link Source} set to <i>setNextMergeSource(Source)</i>.
* <p>
* This method is used during merging to provide implementation agnostic
* default merge implementation.
* </p>
* <p>
* The given document id must be the same document id returned from
* {@link ValuesEnum#docID()} when this method is called. All documents IDs
* between the given ID and the previously given ID or <tt>0</tt> if the
* method is call the first time are filled with default values depending on
* the {@link Writer} implementation. The given document ID must always be
* greater than the previous ID or <tt>0</tt> if called the first time.
* All documents IDs between the given ID and the previously given ID or
* <tt>0</tt> if the method is call the first time are filled with default
* values depending on the {@link Writer} implementation. The given document
* ID must always be greater than the previous ID or <tt>0</tt> if called the
* first time.
*/
protected abstract void mergeDoc(int docID) throws IOException;
protected abstract void mergeDoc(int docID, int sourceDoc) throws IOException;
/**
* Sets the next {@link ValuesEnum} to consume values from on calls to
* {@link #mergeDoc(int)}
* Sets the next {@link Source} to consume values from on calls to
* {@link #mergeDoc(int, int)}
*
* @param valuesEnum
* the next {@link ValuesEnum}, this must not be null
* @param mergeSource
* the next {@link Source}, this must not be null
*/
protected abstract void setNextEnum(ValuesEnum valuesEnum);
protected void setNextMergeSource(Source mergeSource) {
currentMergeSource = mergeSource;
}
/**
* Finish writing and close any files and resources used by this Writer.
@ -141,34 +143,20 @@ public abstract class Writer extends DocValuesConsumer {
// simply override this and decide if they want to merge
// segments using this generic implementation or if a bulk merge is possible
// / feasible.
final ValuesEnum valEnum = state.reader.getEnum();
assert valEnum != null;
try {
setNextEnum(valEnum); // set the current enum we are working on - the
// impl. will get the correct reference for the type
// it supports
int docID = state.docBase;
final Bits liveDocs = state.liveDocs;
final int docCount = state.docCount;
int currentDocId;
if ((currentDocId = valEnum.advance(0)) != ValuesEnum.NO_MORE_DOCS) {
for (int i = 0; i < docCount; i++) {
if (liveDocs == null || liveDocs.get(i)) {
if (currentDocId < i) {
if ((currentDocId = valEnum.advance(i)) == ValuesEnum.NO_MORE_DOCS) {
break; // advance can jump over default values
}
}
if (currentDocId == i) { // we are on the doc to merge
mergeDoc(docID);
}
++docID;
}
}
final Source source = state.reader.getDirectSource();
assert source != null;
setNextMergeSource(source); // set the current enum we are working on - the
// impl. will get the correct reference for the type
// it supports
int docID = state.docBase;
final Bits liveDocs = state.liveDocs;
final int docCount = state.docCount;
for (int i = 0; i < docCount; i++) {
if (liveDocs == null || liveDocs.get(i)) {
mergeDoc(docID++, i);
}
} finally {
valEnum.close();
}
}
/**
@ -182,11 +170,6 @@ public abstract class Writer extends DocValuesConsumer {
* the file name id used to create files within the writer.
* @param directory
* the {@link Directory} to create the files from.
* @param comp
* a {@link BytesRef} comparator used for {@link Bytes} variants. If
* <code>null</code>
* {@link BytesRef#getUTF8SortedAsUnicodeComparator()} is used as the
* default.
* @param bytesUsed
* a byte-usage tracking reference
* @return a new {@link Writer} instance for the given {@link ValueType}
@ -205,28 +188,27 @@ public abstract class Writer extends DocValuesConsumer {
case VAR_INTS:
return Ints.getWriter(directory, id, bytesUsed, type, context);
case FLOAT_32:
return Floats.getWriter(directory, id, 4, bytesUsed, context);
return Floats.getWriter(directory, id, bytesUsed, context, type);
case FLOAT_64:
return Floats.getWriter(directory, id, 8, bytesUsed, context);
return Floats.getWriter(directory, id, bytesUsed, context, type);
case BYTES_FIXED_STRAIGHT:
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, true,
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, true, comp,
bytesUsed, context);
case BYTES_FIXED_DEREF:
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, true,
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, true, comp,
bytesUsed, context);
case BYTES_FIXED_SORTED:
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, true,
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, true, comp,
bytesUsed, context);
case BYTES_VAR_STRAIGHT:
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, false,
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, false, comp,
bytesUsed, context);
case BYTES_VAR_DEREF:
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, false,
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, false, comp,
bytesUsed, context);
case BYTES_VAR_SORTED:
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, false,
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, false, comp,
bytesUsed, context);
default:
throw new IllegalArgumentException("Unknown Values: " + type);
}

View File

@ -1,109 +0,0 @@
package org.apache.lucene.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Represents double[], as a slice (offset + length) into an existing double[].
*
* @lucene.internal
*/
public final class FloatsRef implements Cloneable {
public double[] floats;
public int offset;
public int length;
public FloatsRef() {
}
public FloatsRef(int capacity) {
floats = new double[capacity];
}
public void set(double value) {
floats[offset] = value;
}
public double get() {
return floats[offset];
}
public FloatsRef(double[] floats, int offset, int length) {
this.floats = floats;
this.offset = offset;
this.length = length;
}
public FloatsRef(FloatsRef other) {
copy(other);
}
@Override
public Object clone() {
return new FloatsRef(this);
}
@Override
public int hashCode() {
final int prime = 31;
int result = 0;
final int end = offset + length;
for(int i = offset; i < end; i++) {
long value = Double.doubleToLongBits(floats[i]);
result = prime * result + (int) (value ^ (value >>> 32));
}
return result;
}
@Override
public boolean equals(Object other) {
return other instanceof FloatsRef && this.floatsEquals((FloatsRef) other);
}
public boolean floatsEquals(FloatsRef other) {
if (length == other.length) {
int otherUpto = other.offset;
final double[] otherFloats = other.floats;
final int end = offset + length;
for(int upto=offset;upto<end;upto++,otherUpto++) {
if (floats[upto] != otherFloats[otherUpto]) {
return false;
}
}
return true;
} else {
return false;
}
}
public void copy(FloatsRef other) {
if (floats == null) {
floats = new double[other.length];
} else {
floats = ArrayUtil.grow(floats, other.length);
}
System.arraycopy(other.floats, other.offset, floats, 0, other.length);
length = other.length;
offset = 0;
}
public void grow(int newLength) {
if (floats.length < newLength) {
floats = ArrayUtil.grow(floats, newLength);
}
}
}

View File

@ -1,109 +0,0 @@
package org.apache.lucene.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Represents long[], as a slice (offset + length) into an existing long[].
*
* @lucene.internal
*/
public final class LongsRef implements Cloneable {
public long[] ints;
public int offset;
public int length;
public LongsRef() {
}
public LongsRef(int capacity) {
ints = new long[capacity];
}
public LongsRef(long[] ints, int offset, int length) {
this.ints = ints;
this.offset = offset;
this.length = length;
}
public LongsRef(LongsRef other) {
copy(other);
}
@Override
public Object clone() {
return new LongsRef(this);
}
public void set(long value) {
ints[offset] = value;
}
public long get() {
return ints[offset];
}
@Override
public int hashCode() {
final int prime = 31;
int result = 0;
final int end = offset + length;
for (int i = offset; i < end; i++) {
long value = ints[i];
result = prime * result + (int) (value ^ (value >>> 32));
}
return result;
}
@Override
public boolean equals(Object other) {
return this.intsEquals((LongsRef) other);
}
public boolean intsEquals(LongsRef other) {
if (length == other.length) {
int otherUpto = other.offset;
final long[] otherInts = other.ints;
final int end = offset + length;
for (int upto = offset; upto < end; upto++, otherUpto++) {
if (ints[upto] != otherInts[otherUpto]) {
return false;
}
}
return true;
} else {
return false;
}
}
public void copy(LongsRef other) {
if (ints == null) {
ints = new long[other.length];
} else {
ints = ArrayUtil.grow(ints, other.length);
}
System.arraycopy(other.ints, other.offset, ints, 0, other.length);
length = other.length;
offset = 0;
}
public void grow(int newLength) {
if (ints.length < newLength) {
ints = ArrayUtil.grow(ints, newLength);
}
}
}

View File

@ -85,6 +85,14 @@ public class PackedInts {
long advance(int ord) throws IOException;
}
public static interface RandomAccessReaderIterator extends ReaderIterator {
/**
* @param index the position of the wanted value.
* @return the value at the stated index.
*/
long get(int index) throws IOException;
}
/**
* A packed integer array that can be modified.
* @lucene.internal
@ -195,6 +203,17 @@ public class PackedInts {
* @lucene.internal
*/
public static ReaderIterator getReaderIterator(IndexInput in) throws IOException {
return getRandomAccessReaderIterator(in);
}
/**
* Retrieve PackedInts as a {@link RandomAccessReaderIterator}
* @param in positioned at the beginning of a stored packed int structure.
* @return an iterator to access the values
* @throws IOException if the structure could not be retrieved.
* @lucene.internal
*/
public static RandomAccessReaderIterator getRandomAccessReaderIterator(IndexInput in) throws IOException {
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;

View File

@ -21,13 +21,15 @@ import org.apache.lucene.store.IndexInput;
import java.io.IOException;
final class PackedReaderIterator implements PackedInts.ReaderIterator {
final class PackedReaderIterator implements PackedInts.RandomAccessReaderIterator {
private long pending;
private int pendingBitsLeft;
private final IndexInput in;
private final int bitsPerValue;
private final int valueCount;
private int position = -1;
private long currentValue;
private final long startPointer;
// masks[n-1] masks for bottom n bits
private final long[] masks;
@ -39,6 +41,7 @@ final class PackedReaderIterator implements PackedInts.ReaderIterator {
this.bitsPerValue = bitsPerValue;
this.in = in;
startPointer = in.getFilePointer();
masks = new long[bitsPerValue];
long v = 1;
@ -76,7 +79,7 @@ final class PackedReaderIterator implements PackedInts.ReaderIterator {
}
++position;
return result;
return currentValue = result;
}
public void close() throws IOException {
@ -106,6 +109,26 @@ final class PackedReaderIterator implements PackedInts.ReaderIterator {
pendingBitsLeft = 64 - (int)(skip % 64);
}
position = ord-1;
return next();
return currentValue = next();
}
@Override
public long get(int index) throws IOException {
assert index < valueCount : "ord must be less than valueCount";
if (index < position) {
pendingBitsLeft = 0;
final long bitsToSkip = (((long) bitsPerValue) * (long) index);
final long skip = bitsToSkip - pendingBitsLeft;
final long closestByte = (skip >> 6) << 3;
in.seek(startPointer + closestByte);
pending = in.readLong();
pendingBitsLeft = 64 - (int) (skip % 64);
position = index - 1;
return currentValue = next();
} else if (index == position) {
return currentValue;
}
return advance(index);
}
}

View File

@ -176,8 +176,8 @@ public class RandomIndexWriter implements Closeable {
IndexDocValuesField docValuesField = new IndexDocValuesField(name);
switch (type) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
case BYTES_FIXED_SORTED:
final String randomUnicodeString = _TestUtil.randomUnicodeString(random, fixedBytesLength);
BytesRef fixedRef = new BytesRef(randomUnicodeString);
if (fixedRef.length > fixedBytesLength) {
@ -189,8 +189,8 @@ public class RandomIndexWriter implements Closeable {
docValuesField.setBytes(fixedRef, type);
break;
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
case BYTES_VAR_SORTED:
BytesRef ref = new BytesRef(_TestUtil.randomUnicodeString(random, 200));
docValuesField.setBytes(ref, type);
break;

View File

@ -25,14 +25,12 @@ import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FloatsRef;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil;
public class TestDocValues extends LuceneTestCase {
private static final Comparator<BytesRef> COMP = BytesRef.getUTF8SortedAsUnicodeComparator();
// TODO -- for sorted test, do our own Sort of the
// values and verify it's identical
@ -45,23 +43,20 @@ public class TestDocValues extends LuceneTestCase {
runTestBytes(Bytes.Mode.DEREF, true);
runTestBytes(Bytes.Mode.DEREF, false);
}
public void testBytesSorted() throws IOException {
runTestBytes(Bytes.Mode.SORTED, true);
runTestBytes(Bytes.Mode.SORTED, false);
}
public void runTestBytes(final Bytes.Mode mode, final boolean fixedSize)
throws IOException {
final BytesRef bytesRef = new BytesRef();
final Comparator<BytesRef> comp = mode == Bytes.Mode.SORTED ? BytesRef
.getUTF8SortedAsUnicodeComparator() : null;
Directory dir = newDirectory();
final Counter trackBytes = Counter.newCounter();
Writer w = Bytes.getWriter(dir, "test", mode, comp, fixedSize, trackBytes, newIOContext(random));
Writer w = Bytes.getWriter(dir, "test", mode, fixedSize, COMP, trackBytes, newIOContext(random));
int maxDoc = 220;
final String[] values = new String[maxDoc];
final int fixedLength = 1 + atLeast(50);
@ -81,24 +76,7 @@ public class TestDocValues extends LuceneTestCase {
w.finish(maxDoc);
assertEquals(0, trackBytes.get());
IndexDocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc, comp, newIOContext(random));
for (int iter = 0; iter < 2; iter++) {
ValuesEnum bytesEnum = getEnum(r);
assertNotNull("enum is null", bytesEnum);
BytesRef ref = bytesEnum.bytes();
for (int i = 0; i < 2; i++) {
final int idx = 2 * i;
assertEquals("doc: " + idx, idx, bytesEnum.advance(idx));
String utf8String = ref.utf8ToString();
assertEquals("doc: " + idx + " lenLeft: " + values[idx].length()
+ " lenRight: " + utf8String.length(), values[idx], utf8String);
}
assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc));
assertEquals(ValuesEnum.NO_MORE_DOCS, bytesEnum.advance(maxDoc + 1));
bytesEnum.close();
}
IndexDocValues r = Bytes.getValues(dir, "test", mode, fixedSize, maxDoc, COMP, newIOContext(random));
// Verify we can load source twice:
for (int iter = 0; iter < 2; iter++) {
@ -106,7 +84,7 @@ public class TestDocValues extends LuceneTestCase {
IndexDocValues.SortedSource ss;
if (mode == Bytes.Mode.SORTED) {
// default is unicode so we can simply pass null here
s = ss = getSortedSource(r, random.nextBoolean() ? comp : null);
s = ss = getSortedSource(r);
} else {
s = getSource(r);
ss = null;
@ -121,7 +99,7 @@ public class TestDocValues extends LuceneTestCase {
assertEquals("doc " + idx, values[idx], ss.getByOrd(ss.ord(idx),
bytesRef).utf8ToString());
int ord = ss
.getByValue(new BytesRef(values[idx]));
.getByValue(new BytesRef(values[idx]), new BytesRef());
assertTrue(ord >= 0);
assertEquals(ss.ord(idx), ord);
}
@ -129,10 +107,10 @@ public class TestDocValues extends LuceneTestCase {
// Lookup random strings:
if (mode == Bytes.Mode.SORTED) {
final int numValues = ss.getValueCount();
final int valueCount = ss.getValueCount();
for (int i = 0; i < 1000; i++) {
BytesRef bytesValue = new BytesRef(_TestUtil.randomFixedByteLengthUnicodeString(random, fixedSize? fixedLength : 1 + random.nextInt(39)));
int ord = ss.getByValue(bytesValue);
int ord = ss.getByValue(bytesValue, new BytesRef());
if (ord >= 0) {
assertTrue(bytesValue
.bytesEquals(ss.getByOrd(ord, bytesRef)));
@ -151,22 +129,23 @@ public class TestDocValues extends LuceneTestCase {
final BytesRef firstRef = ss.getByOrd(1, bytesRef);
// random string was before our first
assertTrue(firstRef.compareTo(bytesValue) > 0);
} else if (insertIndex == numValues) {
final BytesRef lastRef = ss.getByOrd(numValues-1, bytesRef);
} else if (insertIndex == valueCount) {
final BytesRef lastRef = ss.getByOrd(valueCount-1, bytesRef);
// random string was after our last
assertTrue(lastRef.compareTo(bytesValue) < 0);
} else {
final BytesRef before = (BytesRef) ss.getByOrd(insertIndex-1, bytesRef)
.clone();
BytesRef after = ss.getByOrd(insertIndex, bytesRef);
assertTrue(comp.compare(before, bytesValue) < 0);
assertTrue(comp.compare(bytesValue, after) < 0);
assertTrue(COMP.compare(before, bytesValue) < 0);
assertTrue(COMP.compare(bytesValue, after) < 0);
}
}
}
}
}
r.close();
dir.close();
}
@ -194,14 +173,6 @@ public class TestDocValues extends LuceneTestCase {
expectedTypes[i], source.type());
assertEquals(minMax[i][0], source.getInt(0));
assertEquals(minMax[i][1], source.getInt(1));
ValuesEnum iEnum = getEnum(r);
assertEquals(i + " with min: " + minMax[i][0] + " max: " + minMax[i][1],
expectedTypes[i], iEnum.type());
assertEquals(0, iEnum.nextDoc());
assertEquals(minMax[i][0], iEnum.intsRef.get());
assertEquals(1, iEnum.nextDoc());
assertEquals(minMax[i][1], iEnum.intsRef.get());
assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc());
r.close();
dir.close();
@ -308,12 +279,12 @@ public class TestDocValues extends LuceneTestCase {
float[] sourceArray = new float[] {1,2,3};
Directory dir = newDirectory();
final Counter trackBytes = Counter.newCounter();
Writer w = Floats.getWriter(dir, "test", 4, trackBytes, newIOContext(random));
Writer w = Floats.getWriter(dir, "test", trackBytes, newIOContext(random), ValueType.FLOAT_32);
for (int i = 0; i < sourceArray.length; i++) {
w.add(i, sourceArray[i]);
}
w.finish(sourceArray.length);
IndexDocValues r = Floats.getValues(dir, "test", 3, newIOContext(random));
IndexDocValues r = Floats.getValues(dir, "test", 3, newIOContext(random), ValueType.FLOAT_32);
Source source = r.getSource();
assertTrue(source.hasArray());
float[] loaded = ((float[])source.getArray());
@ -329,12 +300,12 @@ public class TestDocValues extends LuceneTestCase {
double[] sourceArray = new double[] {1,2,3};
Directory dir = newDirectory();
final Counter trackBytes = Counter.newCounter();
Writer w = Floats.getWriter(dir, "test", 8, trackBytes, newIOContext(random));
Writer w = Floats.getWriter(dir, "test", trackBytes, newIOContext(random), ValueType.FLOAT_64);
for (int i = 0; i < sourceArray.length; i++) {
w.add(i, sourceArray[i]);
}
w.finish(sourceArray.length);
IndexDocValues r = Floats.getValues(dir, "test", 3, newIOContext(random));
IndexDocValues r = Floats.getValues(dir, "test", 3, newIOContext(random), ValueType.FLOAT_64);
Source source = r.getSource();
assertTrue(source.hasArray());
double[] loaded = ((double[])source.getArray());
@ -373,54 +344,23 @@ public class TestDocValues extends LuceneTestCase {
}
}
for (int iter = 0; iter < 2; iter++) {
ValuesEnum iEnum = getEnum(r);
assertEquals(type, iEnum.type());
LongsRef ints = iEnum.getInt();
for (int i = 0; i < NUM_VALUES + additionalDocs; i++) {
assertEquals(i, iEnum.nextDoc());
if (i < NUM_VALUES) {
assertEquals(values[i], ints.get());
} else {
assertEquals(0, ints.get());
}
}
assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.nextDoc());
iEnum.close();
}
for (int iter = 0; iter < 2; iter++) {
ValuesEnum iEnum = getEnum(r);
assertEquals(type, iEnum.type());
LongsRef ints = iEnum.getInt();
for (int i = 0; i < NUM_VALUES + additionalDocs; i += 1 + random.nextInt(25)) {
assertEquals(i, iEnum.advance(i));
if (i < NUM_VALUES) {
assertEquals(values[i], ints.get());
} else {
assertEquals(0, ints.get());
}
}
assertEquals(ValuesEnum.NO_MORE_DOCS, iEnum.advance(NUM_VALUES + additionalDocs));
iEnum.close();
}
r.close();
dir.close();
}
}
public void testFloats4() throws IOException {
runTestFloats(4, 0.00001);
runTestFloats(ValueType.FLOAT_32, 0.00001);
}
private void runTestFloats(int precision, double delta) throws IOException {
private void runTestFloats(ValueType type, double delta) throws IOException {
Directory dir = newDirectory();
final Counter trackBytes = Counter.newCounter();
Writer w = Floats.getWriter(dir, "test", precision, trackBytes, newIOContext(random));
Writer w = Floats.getWriter(dir, "test", trackBytes, newIOContext(random), type);
final int NUM_VALUES = 777 + random.nextInt(777);;
final double[] values = new double[NUM_VALUES];
for (int i = 0; i < NUM_VALUES; i++) {
final double v = precision == 4 ? random.nextFloat() : random
final double v = type == ValueType.FLOAT_32 ? random.nextFloat() : random
.nextDouble();
values[i] = v;
w.add(i, v);
@ -429,64 +369,38 @@ public class TestDocValues extends LuceneTestCase {
w.finish(NUM_VALUES + additionalValues);
assertEquals(0, trackBytes.get());
IndexDocValues r = Floats.getValues(dir, "test", NUM_VALUES + additionalValues, newIOContext(random));
IndexDocValues r = Floats.getValues(dir, "test", NUM_VALUES + additionalValues, newIOContext(random), type);
for (int iter = 0; iter < 2; iter++) {
Source s = getSource(r);
for (int i = 0; i < NUM_VALUES; i++) {
assertEquals(values[i], s.getFloat(i), 0.0f);
assertEquals("" + i, values[i], s.getFloat(i), 0.0f);
}
}
for (int iter = 0; iter < 2; iter++) {
ValuesEnum fEnum = getEnum(r);
FloatsRef floats = fEnum.getFloat();
for (int i = 0; i < NUM_VALUES + additionalValues; i++) {
assertEquals(i, fEnum.nextDoc());
if (i < NUM_VALUES) {
assertEquals(values[i], floats.get(), delta);
} else {
assertEquals(0.0d, floats.get(), delta);
}
}
assertEquals(ValuesEnum.NO_MORE_DOCS, fEnum.nextDoc());
fEnum.close();
}
for (int iter = 0; iter < 2; iter++) {
ValuesEnum fEnum = getEnum(r);
FloatsRef floats = fEnum.getFloat();
for (int i = 0; i < NUM_VALUES + additionalValues; i += 1 + random.nextInt(25)) {
assertEquals(i, fEnum.advance(i));
if (i < NUM_VALUES) {
assertEquals(values[i], floats.get(), delta);
} else {
assertEquals(0.0d, floats.get(), delta);
}
}
assertEquals(ValuesEnum.NO_MORE_DOCS, fEnum.advance(NUM_VALUES + additionalValues));
fEnum.close();
}
r.close();
dir.close();
}
public void testFloats8() throws IOException {
runTestFloats(8, 0.0);
runTestFloats(ValueType.FLOAT_64, 0.0);
}
private ValuesEnum getEnum(IndexDocValues values) throws IOException {
return random.nextBoolean() ? values.getEnum() : getSource(values).getEnum();
}
private Source getSource(IndexDocValues values) throws IOException {
// getSource uses cache internally
return random.nextBoolean() ? values.load() : values.getSource();
switch(random.nextInt(5)) {
case 3:
return values.load();
case 2:
return values.getDirectSource();
case 1:
return values.getSource();
default:
return values.getSource();
}
}
private SortedSource getSortedSource(IndexDocValues values,
Comparator<BytesRef> comparator) throws IOException {
// getSortedSource uses cache internally
return random.nextBoolean() ? values.loadSorted(comparator) : values
.getSortedSorted(comparator);
private SortedSource getSortedSource(IndexDocValues values) throws IOException {
return getSource(values).asSortedSource();
}
}

View File

@ -47,8 +47,6 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.FloatsRef;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.junit.Before;
@ -136,7 +134,6 @@ public class TestDocValuesIndexing extends LuceneTestCase {
Collections.shuffle(values, random);
ValueType first = values.get(0);
ValueType second = values.get(1);
String msg = "[first=" + first.name() + ", second=" + second.name() + "]";
// index first index
Directory d_1 = newDirectory();
IndexWriter w_1 = new IndexWriter(d_1, writerConfig(random.nextBoolean()));
@ -171,36 +168,66 @@ public class TestDocValuesIndexing extends LuceneTestCase {
// check values
IndexReader merged = IndexReader.open(w, true);
ValuesEnum vE_1 = getValuesEnum(getDocValues(r_1, first.name()));
ValuesEnum vE_2 = getValuesEnum(getDocValues(r_2, second.name()));
ValuesEnum vE_1_merged = getValuesEnum(getDocValues(merged, first.name()));
ValuesEnum vE_2_merged = getValuesEnum(getDocValues(merged, second
Source source_1 = getSource(getDocValues(r_1, first.name()));
Source source_2 = getSource(getDocValues(r_2, second.name()));
Source source_1_merged = getSource(getDocValues(merged, first.name()));
Source source_2_merged = getSource(getDocValues(merged, second
.name()));
switch (second) { // these variants don't advance over missing values
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_STRAIGHT:
case FLOAT_32:
case FLOAT_64:
case VAR_INTS:
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
assertEquals(msg, valuesPerIndex-1, vE_2_merged.advance(valuesPerIndex-1));
for (int i = 0; i < r_1.maxDoc(); i++) {
switch (first) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_STRAIGHT:
case BYTES_FIXED_SORTED:
case BYTES_VAR_SORTED:
assertEquals(source_1.getBytes(i, new BytesRef()),
source_1_merged.getBytes(i, new BytesRef()));
break;
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
case VAR_INTS:
assertEquals(source_1.getInt(i), source_1_merged.getInt(i));
break;
case FLOAT_32:
case FLOAT_64:
assertEquals(source_1.getFloat(i), source_1_merged.getFloat(i), 0.0d);
break;
default:
fail("unkonwn " + first);
}
}
for (int i = 0; i < valuesPerIndex; i++) {
assertEquals(msg, i, vE_1.nextDoc());
assertEquals(msg, i, vE_1_merged.nextDoc());
assertEquals(msg, i, vE_2.nextDoc());
assertEquals(msg, i + valuesPerIndex, vE_2_merged.nextDoc());
for (int i = r_1.maxDoc(); i < merged.maxDoc(); i++) {
switch (second) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_STRAIGHT:
case BYTES_FIXED_SORTED:
case BYTES_VAR_SORTED:
assertEquals(source_2.getBytes(i - r_1.maxDoc(), new BytesRef()),
source_2_merged.getBytes(i, new BytesRef()));
break;
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
case VAR_INTS:
assertEquals(source_2.getInt(i - r_1.maxDoc()),
source_2_merged.getInt(i));
break;
case FLOAT_32:
case FLOAT_64:
assertEquals(source_2.getFloat(i - r_1.maxDoc()),
source_2_merged.getFloat(i), 0.0d);
break;
default:
fail("unkonwn " + first);
}
}
assertEquals(msg, ValuesEnum.NO_MORE_DOCS, vE_1.nextDoc());
assertEquals(msg, ValuesEnum.NO_MORE_DOCS, vE_2.nextDoc());
assertEquals(msg, ValuesEnum.NO_MORE_DOCS, vE_1_merged.advance(valuesPerIndex*2));
assertEquals(msg, ValuesEnum.NO_MORE_DOCS, vE_2_merged.nextDoc());
// close resources
r_1.close();
r_2.close();
@ -260,22 +287,12 @@ public class TestDocValuesIndexing extends LuceneTestCase {
assertEquals("index " + i, 0, value);
}
ValuesEnum intsEnum = getValuesEnum(intsReader);
assertTrue(intsEnum.advance(base) >= base);
intsEnum = getValuesEnum(intsReader);
LongsRef enumRef = intsEnum.getInt();
int expected = 0;
for (int i = base; i < r.numDocs(); i++, expected++) {
while (deleted.get(expected)) {
expected++;
}
assertEquals("advance failed at index: " + i + " of " + r.numDocs()
+ " docs", i, intsEnum.advance(i));
assertEquals(val + " mod: " + mod + " index: " + i, expected%mod, ints.getInt(i));
assertEquals(expected%mod, enumRef.get());
}
}
break;
@ -289,20 +306,11 @@ public class TestDocValuesIndexing extends LuceneTestCase {
assertEquals(val + " failed for doc: " + i + " base: " + base,
0.0d, value, 0.0d);
}
ValuesEnum floatEnum = getValuesEnum(floatReader);
assertTrue(floatEnum.advance(base) >= base);
floatEnum = getValuesEnum(floatReader);
FloatsRef enumRef = floatEnum.getFloat();
int expected = 0;
for (int i = base; i < r.numDocs(); i++, expected++) {
while (deleted.get(expected)) {
expected++;
}
assertEquals("advance failed at index: " + i + " of " + r.numDocs()
+ " docs base:" + base, i, floatEnum.advance(i));
assertEquals(floatEnum.getClass() + " index " + i, 2.0 * expected,
enumRef.get(), 0.00001);
assertEquals("index " + i, 2.0 * expected, floats.getFloat(i),
0.00001);
}
@ -320,7 +328,7 @@ public class TestDocValuesIndexing extends LuceneTestCase {
w.close();
d.close();
}
public void runTestIndexBytes(IndexWriterConfig cfg, boolean withDeletions)
throws CorruptIndexException, LockObtainFailedException, IOException {
final Directory d = newDirectory();
@ -353,6 +361,8 @@ public class TestDocValuesIndexing extends LuceneTestCase {
switch (byteIndexValue) {
case BYTES_VAR_STRAIGHT:
case BYTES_FIXED_STRAIGHT:
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
// fixed straight returns bytesref with zero bytes all of fixed
// length
assertNotNull("expected none null - " + msg, br);
@ -365,23 +375,13 @@ public class TestDocValuesIndexing extends LuceneTestCase {
}
}
break;
case BYTES_VAR_SORTED:
case BYTES_FIXED_SORTED:
case BYTES_VAR_DEREF:
case BYTES_FIXED_DEREF:
default:
assertNotNull("expected none null - " + msg, br);
assertEquals(0, br.length);
assertEquals(byteIndexValue + "", 0, br.length);
// make sure we advance at least until base
ValuesEnum bytesEnum = getValuesEnum(bytesReader);
final int advancedTo = bytesEnum.advance(0);
assertTrue(byteIndexValue.name() + " advanced failed base:" + base
+ " advancedTo: " + advancedTo, base <= advancedTo);
}
}
ValuesEnum bytesEnum = getValuesEnum(bytesReader);
final BytesRef enumRef = bytesEnum.bytes();
// test the actual doc values added in this iteration
assertEquals(base + numRemainingValues, r.numDocs());
int v = 0;
@ -393,17 +393,8 @@ public class TestDocValuesIndexing extends LuceneTestCase {
upto += bytesSize;
}
BytesRef br = bytes.getBytes(i, new BytesRef());
if (bytesEnum.docID() != i) {
assertEquals("seek failed for index " + i + " " + msg, i, bytesEnum
.advance(i));
}
assertTrue(msg, br.length > 0);
for (int j = 0; j < br.length; j++, upto++) {
assertTrue(" enumRef not initialized " + msg,
enumRef.bytes.length > 0);
assertEquals(
"EnumRef Byte at index " + j + " doesn't match - " + msg, upto,
enumRef.bytes[enumRef.offset + j]);
if (!(br.bytes.length > br.offset + j))
br = bytes.getBytes(i, new BytesRef());
assertTrue("BytesRef index exceeded [" + msg + "] offset: "
@ -446,33 +437,23 @@ public class TestDocValuesIndexing extends LuceneTestCase {
}
private Source getSource(IndexDocValues values) throws IOException {
Source source;
if (random.nextInt(10) == 0) {
source = values.load();
} else {
// getSource uses cache internally
source = values.getSource();
// getSource uses cache internally
switch(random.nextInt(5)) {
case 3:
return values.load();
case 2:
return values.getDirectSource();
case 1:
return values.getSource();
default:
return values.getSource();
}
assertNotNull(source);
return source;
}
private ValuesEnum getValuesEnum(IndexDocValues values) throws IOException {
ValuesEnum valuesEnum;
if (!(values instanceof MultiIndexDocValues) && random.nextInt(10) == 0) {
// TODO not supported by MultiDocValues yet!
valuesEnum = getSource(values).getEnum();
} else {
valuesEnum = values.getEnum();
}
assertNotNull(valuesEnum);
return valuesEnum;
}
private static EnumSet<ValueType> BYTES = EnumSet.of(ValueType.BYTES_FIXED_DEREF,
ValueType.BYTES_FIXED_SORTED, ValueType.BYTES_FIXED_STRAIGHT, ValueType.BYTES_VAR_DEREF,
ValueType.BYTES_VAR_SORTED, ValueType.BYTES_VAR_STRAIGHT);
ValueType.BYTES_FIXED_STRAIGHT, ValueType.BYTES_VAR_DEREF,
ValueType.BYTES_VAR_STRAIGHT, ValueType.BYTES_FIXED_SORTED, ValueType.BYTES_VAR_SORTED);
private static EnumSet<ValueType> NUMERICS = EnumSet.of(ValueType.VAR_INTS,
ValueType.FIXED_INTS_16, ValueType.FIXED_INTS_32,

View File

@ -115,12 +115,38 @@ public class TestPackedInts extends LuceneTestCase {
assertEquals(fp, in.getFilePointer());
in.close();
}
{ // test reader iterator get
IndexInput in = d.openInput("out.bin", newIOContext(random));
PackedInts.RandomAccessReaderIterator intsEnum = PackedInts.getRandomAccessReaderIterator(in);
for (int i = 0; i < valueCount; i++) {
final String msg = "index=" + i + " ceil=" + ceil + " valueCount="
+ valueCount + " nbits=" + nbits + " for "
+ intsEnum.getClass().getSimpleName();
final int ord = random.nextInt(valueCount);
long seek = intsEnum.get(ord);
assertEquals(msg, seek, values[ord]);
if (random.nextBoolean() && ord < valueCount-1) {
if (random.nextBoolean()) {
assertEquals(msg, values[ord+1], intsEnum.advance(ord+1));
} else {
assertEquals(msg, values[ord+1], intsEnum.next());
}
}
}
if (intsEnum.ord() < valueCount - 1)
assertEquals(values[valueCount - 1], intsEnum
.advance(valueCount - 1));
assertEquals(valueCount - 1, intsEnum.ord());
assertEquals(fp, in.getFilePointer());
in.close();
}
ceil *= 2;
d.close();
}
}
}
public void testControlledEquality() {
final int VALUE_COUNT = 255;
final int BITS_PER_VALUE = 8;