LUCENE-3467: Cut over numeric docvalues to fixed straight bytes

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1176906 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2011-09-28 14:52:30 +00:00
parent 42b419aa31
commit 534d8abed0
17 changed files with 1441 additions and 2116 deletions

View File

@ -154,7 +154,7 @@ public class DefaultDocValuesProducer extends PerDocValues {
case FIXED_INTS_64:
case FIXED_INTS_8:
case VAR_INTS:
return Ints.getValues(dir, id, docCount, context);
return Ints.getValues(dir, id, docCount, type, context);
case FLOAT_32:
return Floats.getValues(dir, id, docCount, context);
case FLOAT_64:

View File

@ -27,16 +27,25 @@ import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.values.IndexDocValues.SortedSource;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ByteBlockPool.Allocator;
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
import org.apache.lucene.util.packed.PackedInts;
/**
* Provides concrete Writer/Reader implementations for <tt>byte[]</tt> value per
@ -185,16 +194,18 @@ public final class Bytes {
}
// TODO open up this API?
static abstract class BytesBaseSource extends Source {
static abstract class BytesSourceBase extends Source {
private final PagedBytes pagedBytes;
private final ValueType type;
protected final IndexInput datIn;
protected final IndexInput idxIn;
protected final static int PAGED_BYTES_BITS = 15;
protected final PagedBytes.Reader data;
protected final long totalLengthInBytes;
protected BytesBaseSource(IndexInput datIn, IndexInput idxIn,
PagedBytes pagedBytes, long bytesToRead) throws IOException {
protected BytesSourceBase(IndexInput datIn, IndexInput idxIn,
PagedBytes pagedBytes, long bytesToRead, ValueType type) throws IOException {
assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
+ (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
this.datIn = datIn;
@ -203,6 +214,7 @@ public final class Bytes {
this.pagedBytes.copy(datIn, bytesToRead);
data = pagedBytes.freeze(true);
this.idxIn = idxIn;
this.type = type;
}
public void close() throws IOException {
@ -220,6 +232,17 @@ public final class Bytes {
}
}
}
@Override
public ValueType type() {
return type;
}
@Override
public int getValueCount() {
throw new UnsupportedOperationException();
}
/**
* Returns one greater than the largest possible document number.
@ -245,18 +268,46 @@ public final class Bytes {
}
}
static abstract class DerefBytesSourceBase extends BytesSourceBase {
protected final PackedInts.Reader addresses;
public DerefBytesSourceBase(IndexInput datIn, IndexInput idxIn, long bytesToRead, ValueType type) throws IOException {
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), bytesToRead, type);
addresses = PackedInts.getReader(idxIn);
}
@Override
public int getValueCount() {
return addresses.size();
}
@Override
protected int maxDoc() {
return addresses.size();
}
static abstract class BytesBaseSortedSource extends SortedSource {
}
static abstract class BytesSortedSourceBase extends SortedSource {
private final PagedBytes pagedBytes;
private final Comparator<BytesRef> comp;
protected final PackedInts.Reader docToOrdIndex;
private final ValueType type;
protected final IndexInput datIn;
protected final IndexInput idxIn;
protected final BytesRef defaultValue = new BytesRef();
protected final static int PAGED_BYTES_BITS = 15;
private final PagedBytes pagedBytes;
protected final PagedBytes.Reader data;
private final Comparator<BytesRef> comp;
protected BytesBaseSortedSource(IndexInput datIn, IndexInput idxIn,
Comparator<BytesRef> comp, PagedBytes pagedBytes, long bytesToRead)
protected BytesSortedSourceBase(IndexInput datIn, IndexInput idxIn,
Comparator<BytesRef> comp, long bytesToRead, ValueType type) throws IOException {
this(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), bytesToRead, type);
}
protected BytesSortedSourceBase(IndexInput datIn, IndexInput idxIn,
Comparator<BytesRef> comp, PagedBytes pagedBytes, long bytesToRead,ValueType type)
throws IOException {
assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
+ (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
@ -267,8 +318,15 @@ public final class Bytes {
this.idxIn = idxIn;
this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator()
: comp;
docToOrdIndex = PackedInts.getReader(idxIn);
this.type = type;
}
@Override
public int ord(int docID) {
return (int) docToOrdIndex.get(docID) -1;
}
@Override
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
@ -277,22 +335,15 @@ public final class Bytes {
}
protected void closeIndexInput() throws IOException {
try {
if (datIn != null) {
datIn.close();
}
} finally {
if (idxIn != null) {// if straight
idxIn.close();
}
}
IOUtils.close(datIn, idxIn);
}
/**
* Returns the largest doc id + 1 in this doc values source
*/
protected abstract int maxDoc();
public int maxDoc() {
return docToOrdIndex.size();
}
/**
* Copies the value for the given ord to the given {@link BytesRef} and
* returns it.
@ -336,6 +387,11 @@ public final class Bytes {
}
};
}
@Override
public ValueType type() {
return type;
}
}
// TODO: open up this API?!
@ -359,7 +415,7 @@ public final class Bytes {
this.context = context;
}
protected IndexOutput getDataOut() throws IOException {
protected IndexOutput getOrCreateDataOut() throws IOException {
if (datOut == null) {
boolean success = false;
try {
@ -375,8 +431,16 @@ public final class Bytes {
}
return datOut;
}
protected IndexOutput getIndexOut() {
return idxOut;
}
protected IndexOutput getDataOut() {
return datOut;
}
protected IndexOutput getIndexOut() throws IOException {
protected IndexOutput getOrCreateIndexOut() throws IOException {
boolean success = false;
try {
if (idxOut == null) {
@ -503,5 +567,223 @@ public final class Bytes {
}
}
}
static abstract class DerefBytesWriterBase extends BytesWriterBase {
protected int size = -1;
protected int[] docToEntry;
protected final BytesRefHash hash;
protected DerefBytesWriterBase(Directory dir, String id, String codecName,
int codecVersion, Counter bytesUsed, IOContext context)
throws IOException {
this(dir, id, codecName, codecVersion, new DirectTrackingAllocator(
ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed), bytesUsed, context);
}
protected DerefBytesWriterBase(Directory dir, String id, String codecName, int codecVersion, Allocator allocator,
Counter bytesUsed, IOContext context) throws IOException {
super(dir, id, codecName, codecVersion, bytesUsed, context);
hash = new BytesRefHash(new ByteBlockPool(allocator),
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
docToEntry = new int[1];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
}
protected static int writePrefixLength(DataOutput datOut, BytesRef bytes)
throws IOException {
if (bytes.length < 128) {
datOut.writeByte((byte) bytes.length);
return 1;
} else {
datOut.writeByte((byte) (0x80 | (bytes.length >> 8)));
datOut.writeByte((byte) (bytes.length & 0xff));
return 2;
}
}
@Override
public void add(int docID, BytesRef bytes) throws IOException {
if (bytes.length == 0) { // default value - skip it
return;
}
checkSize(bytes);
int ord = hash.add(bytes);
if (ord < 0) {
ord = (-ord) - 1;
}
if (docID >= docToEntry.length) {
final int size = docToEntry.length;
docToEntry = ArrayUtil.grow(docToEntry, 1 + docID);
bytesUsed.addAndGet((docToEntry.length - size)
* RamUsageEstimator.NUM_BYTES_INT);
}
docToEntry[docID] = 1 + ord;
}
protected void checkSize(BytesRef bytes) {
if (size == -1) {
size = bytes.length;
} else if (bytes.length != size) {
throw new IllegalArgumentException("expected bytes size=" + size
+ " but got " + bytes.length);
}
}
// Important that we get docCount, in case there were
// some last docs that we didn't see
@Override
public void finish(int docCount) throws IOException {
boolean success = false;
try {
finishInternal(docCount);
success = true;
} finally {
releaseResources();
if (success) {
IOUtils.close(getIndexOut(), getDataOut());
} else {
IOUtils.closeWhileHandlingException(getIndexOut(), getDataOut());
}
}
}
protected abstract void finishInternal(int docCount) throws IOException;
protected void releaseResources() {
hash.close();
bytesUsed
.addAndGet((-docToEntry.length) * RamUsageEstimator.NUM_BYTES_INT);
docToEntry = null;
}
protected void writeIndex(IndexOutput idxOut, int docCount,
long maxValue, int[] toEntry) throws IOException {
writeIndex(idxOut, docCount, maxValue, (int[])null, toEntry);
}
protected void writeIndex(IndexOutput idxOut, int docCount,
long maxValue, int[] addresses, int[] toEntry) throws IOException {
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
PackedInts.bitsRequired(maxValue));
final int limit = docCount > docToEntry.length ? docToEntry.length
: docCount;
assert toEntry.length >= limit -1;
if (addresses != null) {
for (int i = 0; i < limit; i++) {
assert addresses[toEntry[i]] >= 0;
w.add(addresses[toEntry[i]]);
}
} else {
for (int i = 0; i < limit; i++) {
assert toEntry[i] >= 0;
w.add(toEntry[i]);
}
}
for (int i = limit; i < docCount; i++) {
w.add(0);
}
w.finish();
}
protected void writeIndex(IndexOutput idxOut, int docCount,
long maxValue, long[] addresses, int[] toEntry) throws IOException {
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
PackedInts.bitsRequired(maxValue));
final int limit = docCount > docToEntry.length ? docToEntry.length
: docCount;
assert toEntry.length >= limit -1;
if (addresses != null) {
for (int i = 0; i < limit; i++) {
assert addresses[toEntry[i]] >= 0;
w.add(addresses[toEntry[i]]);
}
} else {
for (int i = 0; i < limit; i++) {
assert toEntry[i] >= 0;
w.add(toEntry[i]);
}
}
for (int i = limit; i < docCount; i++) {
w.add(0);
}
w.finish();
}
}
abstract static class DerefBytesEnumBase extends ValuesEnum {
private final PackedInts.ReaderIterator idx;
private final int valueCount;
private int pos = -1;
protected final IndexInput datIn;
protected final long fp;
protected final int size;
protected DerefBytesEnumBase(AttributeSource source, IndexInput datIn,
IndexInput idxIn, int size, ValueType enumType) throws IOException {
super(source, enumType);
this.datIn = datIn;
this.size = size;
idx = PackedInts.getReaderIterator(idxIn);
fp = datIn.getFilePointer();
if (size > 0) {
bytesRef.grow(this.size);
bytesRef.length = this.size;
}
bytesRef.offset = 0;
valueCount = idx.size();
}
protected void copyFrom(ValuesEnum valuesEnum) {
bytesRef = valuesEnum.bytesRef;
if (bytesRef.bytes.length < size) {
bytesRef.grow(size);
}
bytesRef.length = size;
bytesRef.offset = 0;
}
@Override
public int advance(int target) throws IOException {
if (target < valueCount) {
long address;
while ((address = idx.advance(target)) == 0) {
if (++target >= valueCount) {
return pos = NO_MORE_DOCS;
}
}
pos = idx.ord();
fill(address, bytesRef);
return pos;
}
return pos = NO_MORE_DOCS;
}
@Override
public int nextDoc() throws IOException {
if (pos >= valueCount) {
return pos = NO_MORE_DOCS;
}
return advance(pos + 1);
}
public void close() throws IOException {
try {
datIn.close();
} finally {
idx.close();
}
}
protected abstract void fill(long address, BytesRef ref) throws IOException;
@Override
public int docID() {
return pos;
}
}
}

View File

@ -19,26 +19,17 @@ package org.apache.lucene.index.values;
import java.io.IOException;
import org.apache.lucene.index.values.Bytes.BytesBaseSource;
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
import org.apache.lucene.index.values.Bytes.DerefBytesSourceBase;
import org.apache.lucene.index.values.Bytes.DerefBytesEnumBase;
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ByteBlockPool.Allocator;
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
import org.apache.lucene.util.packed.PackedInts;
// Stores fixed-length byte[] by deref, ie when two docs
// have the same value, they store only 1 byte[]
@ -51,135 +42,55 @@ class FixedDerefBytesImpl {
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
static class Writer extends BytesWriterBase {
private int size = -1;
private int[] docToID;
private final BytesRefHash hash;
public static class Writer extends DerefBytesWriterBase {
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
throws IOException {
this(dir, id, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed),
bytesUsed, context);
}
public Writer(Directory dir, String id, Allocator allocator,
Counter bytesUsed, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
hash = new BytesRefHash(new ByteBlockPool(allocator),
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
docToID = new int[1];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
}
@Override
public void add(int docID, BytesRef bytes) throws IOException {
if (bytes.length == 0) // default value - skip it
return;
if (size == -1) {
size = bytes.length;
} else if (bytes.length != size) {
throw new IllegalArgumentException("expected bytes size=" + size
+ " but got " + bytes.length);
}
int ord = hash.add(bytes);
if (ord < 0) {
ord = (-ord) - 1;
}
if (docID >= docToID.length) {
final int size = docToID.length;
docToID = ArrayUtil.grow(docToID, 1 + docID);
bytesUsed.addAndGet((docToID.length - size)
* RamUsageEstimator.NUM_BYTES_INT);
}
docToID[docID] = 1 + ord;
}
// Important that we get docCount, in case there were
// some last docs that we didn't see
@Override
public void finish(int docCount) throws IOException {
boolean success = false;
protected void finishInternal(int docCount) throws IOException {
final int numValues = hash.size();
final IndexOutput datOut = getDataOut();
try {
datOut.writeInt(size);
if (size != -1) {
final BytesRef bytesRef = new BytesRef(size);
for (int i = 0; i < numValues; i++) {
hash.get(i, bytesRef);
datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
}
final IndexOutput datOut = getOrCreateDataOut();
datOut.writeInt(size);
if (size != -1) {
final BytesRef bytesRef = new BytesRef(size);
for (int i = 0; i < numValues; i++) {
hash.get(i, bytesRef);
datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
}
success = true;
} finally {
if (success) {
IOUtils.close(datOut);
} else {
IOUtils.closeWhileHandlingException(datOut);
}
hash.close();
}
success = false;
final IndexOutput idxOut = getIndexOut();
try {
final int count = 1 + numValues;
idxOut.writeInt(count - 1);
// write index
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
PackedInts.bitsRequired(count - 1));
final int limit = docCount > docToID.length ? docToID.length : docCount;
for (int i = 0; i < limit; i++) {
w.add(docToID[i]);
}
// fill up remaining doc with zeros
for (int i = limit; i < docCount; i++) {
w.add(0);
}
w.finish();
success = true;
} finally {
if (success) {
IOUtils.close(idxOut);
} else {
IOUtils.closeWhileHandlingException(idxOut);
}
bytesUsed
.addAndGet((-docToID.length) * RamUsageEstimator.NUM_BYTES_INT);
docToID = null;
}
final IndexOutput idxOut = getOrCreateIndexOut();
idxOut.writeInt(numValues);
writeIndex(idxOut, docCount, numValues, docToEntry);
}
}
public static class Reader extends BytesReaderBase {
private final int size;
private final int numValuesStored;
Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_START, true, context);
size = datIn.readInt();
numValuesStored = idxIn.readInt();
}
@Override
public Source load() throws IOException {
final IndexInput index = cloneIndex();
return new Source(cloneData(), index, size, index.readInt());
return new Source(cloneData(), cloneIndex(), size, numValuesStored);
}
private static class Source extends BytesBaseSource {
private final PackedInts.Reader index;
private static final class Source extends DerefBytesSourceBase {
private final int size;
private final int numValues;
protected Source(IndexInput datIn, IndexInput idxIn, int size,
int numValues) throws IOException {
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size * numValues);
protected Source(IndexInput datIn, IndexInput idxIn, int size, long numValues) throws IOException {
super(datIn, idxIn, size * numValues, ValueType.BYTES_FIXED_DEREF);
this.size = size;
this.numValues = numValues;
index = PackedInts.getReader(idxIn);
}
@Override
public BytesRef getBytes(int docID, BytesRef bytesRef) {
final int id = (int) index.get(docID);
final int id = (int) addresses.get(docID);
if (id == 0) {
bytesRef.length = 0;
return bytesRef;
@ -187,95 +98,18 @@ class FixedDerefBytesImpl {
return data.fillSlice(bytesRef, ((id - 1) * size), size);
}
@Override
public int getValueCount() {
return numValues;
}
@Override
public ValueType type() {
return ValueType.BYTES_FIXED_DEREF;
}
@Override
protected int maxDoc() {
return index.size();
}
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
return new DerefBytesEnum(source, cloneData(), cloneIndex(), size);
return new DerefBytesEnum(source, cloneData(), cloneIndex(), size);
}
static class DerefBytesEnum extends ValuesEnum {
protected final IndexInput datIn;
private final PackedInts.ReaderIterator idx;
protected final long fp;
private final int size;
private final int valueCount;
private int pos = -1;
final static class DerefBytesEnum extends DerefBytesEnumBase {
public DerefBytesEnum(AttributeSource source, IndexInput datIn,
IndexInput idxIn, int size) throws IOException {
this(source, datIn, idxIn, size, ValueType.BYTES_FIXED_DEREF);
}
protected DerefBytesEnum(AttributeSource source, IndexInput datIn,
IndexInput idxIn, int size, ValueType enumType) throws IOException {
super(source, enumType);
this.datIn = datIn;
this.size = size;
idxIn.readInt();// read valueCount
idx = PackedInts.getReaderIterator(idxIn);
fp = datIn.getFilePointer();
if (size > 0) {
bytesRef.grow(this.size);
bytesRef.length = this.size;
}
bytesRef.offset = 0;
valueCount = idx.size();
}
protected void copyFrom(ValuesEnum valuesEnum) {
bytesRef = valuesEnum.bytesRef;
if (bytesRef.bytes.length < size) {
bytesRef.grow(size);
}
bytesRef.length = size;
bytesRef.offset = 0;
}
@Override
public int advance(int target) throws IOException {
if (target < valueCount) {
long address;
while ((address = idx.advance(target)) == 0) {
if (++target >= valueCount) {
return pos = NO_MORE_DOCS;
}
}
pos = idx.ord();
fill(address, bytesRef);
return pos;
}
return pos = NO_MORE_DOCS;
}
@Override
public int nextDoc() throws IOException {
if (pos >= valueCount) {
return pos = NO_MORE_DOCS;
}
return advance(pos + 1);
}
public void close() throws IOException {
try {
datIn.close();
} finally {
idx.close();
}
super(source, datIn, idxIn, size, ValueType.BYTES_FIXED_DEREF);
}
protected void fill(long address, BytesRef ref) throws IOException {
@ -284,12 +118,6 @@ class FixedDerefBytesImpl {
ref.length = size;
ref.offset = 0;
}
@Override
public int docID() {
return pos;
}
}
@Override

View File

@ -20,28 +20,17 @@ package org.apache.lucene.index.values;
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.index.values.Bytes.BytesBaseSortedSource;
import org.apache.lucene.index.values.Bytes.BytesSortedSourceBase;
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ByteBlockPool.Allocator;
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
import org.apache.lucene.util.packed.PackedInts;
// Stores fixed-length byte[] by deref, ie when two docs
// have the same value, they store only 1 byte[]
@ -55,132 +44,49 @@ class FixedSortedBytesImpl {
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
static class Writer extends BytesWriterBase {
private int size = -1;
private int[] docToEntry;
static class Writer extends DerefBytesWriterBase {
private final Comparator<BytesRef> comp;
private final BytesRefHash hash;
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
Counter bytesUsed, IOContext context) throws IOException {
this(dir, id, comp, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed),
bytesUsed, context);
}
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
Allocator allocator, Counter bytesUsed, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
ByteBlockPool pool = new ByteBlockPool(allocator);
hash = new BytesRefHash(pool, BytesRefHash.DEFAULT_CAPACITY,
new TrackingDirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY,
bytesUsed));
docToEntry = new int[1];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
this.comp = comp;
}
@Override
public void add(int docID, BytesRef bytes) throws IOException {
if (bytes.length == 0)
return; // default - skip it
if (size == -1) {
size = bytes.length;
} else if (bytes.length != size) {
throw new IllegalArgumentException("expected bytes size=" + size
+ " but got " + bytes.length);
}
if (docID >= docToEntry.length) {
final int[] newArray = new int[ArrayUtil.oversize(1 + docID,
RamUsageEstimator.NUM_BYTES_INT)];
System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length);
bytesUsed.addAndGet((newArray.length - docToEntry.length)
* RamUsageEstimator.NUM_BYTES_INT);
docToEntry = newArray;
}
int e = hash.add(bytes);
docToEntry[docID] = 1 + (e < 0 ? (-e) - 1 : e);
}
// Important that we get docCount, in case there were
// some last docs that we didn't see
@Override
public void finish(int docCount) throws IOException {
final IndexOutput datOut = getDataOut();
boolean success = false;
public void finishInternal(int docCount) throws IOException {
final IndexOutput datOut = getOrCreateDataOut();
final int count = hash.size();
final int[] address = new int[count];
try {
datOut.writeInt(size);
if (size != -1) {
final int[] sortedEntries = hash.sort(comp);
// first dump bytes data, recording address as we go
final BytesRef bytesRef = new BytesRef(size);
for (int i = 0; i < count; i++) {
final int e = sortedEntries[i];
final BytesRef bytes = hash.get(e, bytesRef);
assert bytes.length == size;
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
address[e] = 1 + i;
}
final int[] address = new int[count+1]; // addr 0 is default values
datOut.writeInt(size);
if (size != -1) {
final int[] sortedEntries = hash.sort(comp);
// first dump bytes data, recording address as we go
final BytesRef bytesRef = new BytesRef(size);
for (int i = 0; i < count; i++) {
final int e = sortedEntries[i];
final BytesRef bytes = hash.get(e, bytesRef);
assert bytes.length == size;
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
address[e + 1] = 1 + i;
}
success = true;
} finally {
if (success) {
IOUtils.close(datOut);
} else {
IOUtils.closeWhileHandlingException(datOut);
}
hash.close();
}
final IndexOutput idxOut = getIndexOut();
success = false;
try {
idxOut.writeInt(count);
// next write index
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
PackedInts.bitsRequired(count));
final int limit;
if (docCount > docToEntry.length) {
limit = docToEntry.length;
} else {
limit = docCount;
}
for (int i = 0; i < limit; i++) {
final int e = docToEntry[i];
if (e == 0) {
// null is encoded as zero
w.add(0);
} else {
assert e > 0 && e <= count : "index must 0 > && <= " + count
+ " was: " + e;
w.add(address[e - 1]);
}
}
for (int i = limit; i < docCount; i++) {
w.add(0);
}
w.finish();
} finally {
if (success) {
IOUtils.close(idxOut);
} else {
IOUtils.closeWhileHandlingException(idxOut);
}
bytesUsed.addAndGet((-docToEntry.length)
* RamUsageEstimator.NUM_BYTES_INT);
docToEntry = null;
}
final IndexOutput idxOut = getOrCreateIndexOut();
idxOut.writeInt(count);
writeIndex(idxOut, docCount, count, address, docToEntry);
}
}
public static class Reader extends BytesReaderBase {
private final int size;
private final int numValuesStored;
public Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_START, true, context);
size = datIn.readInt();
numValuesStored = idxIn.readInt();
}
@Override
@ -192,58 +98,35 @@ class FixedSortedBytesImpl {
@Override
public SortedSource loadSorted(Comparator<BytesRef> comp)
throws IOException {
final IndexInput idxInput = cloneIndex();
final IndexInput datInput = cloneData();
datInput.seek(CodecUtil.headerLength(CODEC_NAME) + 4);
idxInput.seek(CodecUtil.headerLength(CODEC_NAME));
return new Source(datInput, idxInput, size, idxInput.readInt(), comp);
return new Source(cloneData(), cloneIndex(), size, numValuesStored, comp);
}
private static class Source extends BytesBaseSortedSource {
private final PackedInts.Reader index;
private final int numValue;
private static class Source extends BytesSortedSourceBase {
private final int valueCount;
private final int size;
public Source(IndexInput datIn, IndexInput idxIn, int size,
int numValues, Comparator<BytesRef> comp) throws IOException {
super(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), size
* numValues);
super(datIn, idxIn, comp, size * numValues, ValueType.BYTES_FIXED_SORTED);
this.size = size;
this.numValue = numValues;
index = PackedInts.getReader(idxIn);
this.valueCount = numValues;
closeIndexInput();
}
@Override
public int ord(int docID) {
return (int) index.get(docID) -1;
}
@Override
public int getByValue(BytesRef bytes, BytesRef tmpRef) {
return binarySearch(bytes, tmpRef, 0, numValue - 1);
return binarySearch(bytes, tmpRef, 0, valueCount - 1);
}
@Override
public int getValueCount() {
return numValue;
return valueCount;
}
@Override
protected BytesRef deref(int ord, BytesRef bytesRef) {
return data.fillSlice(bytesRef, (ord * size), size);
}
@Override
public ValueType type() {
return ValueType.BYTES_FIXED_SORTED;
}
@Override
protected int maxDoc() {
return index.size();
}
}
@Override

View File

@ -21,7 +21,7 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
import java.io.IOException;
import org.apache.lucene.index.values.Bytes.BytesBaseSource;
import org.apache.lucene.index.values.Bytes.BytesSourceBase;
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
import org.apache.lucene.store.Directory;
@ -46,26 +46,24 @@ class FixedStraightBytesImpl {
static final String CODEC_NAME = "FixedStraightBytes";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
static class Writer extends BytesWriterBase {
private int size = -1;
static abstract class FixedBytesWriterBase extends BytesWriterBase {
protected int lastDocID = -1;
// start at -1 if the first added value is > 0
private int lastDocID = -1;
protected int size = -1;
private final int byteBlockSize = BYTE_BLOCK_SIZE;
private final ByteBlockPool pool;
private boolean merge;
private final int byteBlockSize;
private IndexOutput datOut;
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
protected FixedBytesWriterBase(Directory dir, String id, String codecName,
int version, Counter bytesUsed, IOContext context) throws IOException {
super(dir, id, codecName, version, bytesUsed, context);
pool = new ByteBlockPool(new DirectTrackingAllocator(bytesUsed));
byteBlockSize = BYTE_BLOCK_SIZE;
}
@Override
public void add(int docID, BytesRef bytes) throws IOException {
assert lastDocID < docID;
assert !merge;
if (size == -1) {
if (bytes.length > BYTE_BLOCK_SIZE) {
throw new IllegalArgumentException("bytes arrays > " + Short.MAX_VALUE + " are not supported");
@ -84,7 +82,6 @@ class FixedStraightBytesImpl {
}
private final void advancePool(int docID) {
assert !merge;
long numBytes = (docID - (lastDocID+1))*size;
while(numBytes > 0) {
if (numBytes + pool.byteUpto < byteBlockSize) {
@ -97,14 +94,50 @@ class FixedStraightBytesImpl {
}
assert numBytes == 0;
}
protected void set(BytesRef ref, int docId) {
assert BYTE_BLOCK_SIZE % size == 0 : "BYTE_BLOCK_SIZE ("+ BYTE_BLOCK_SIZE + ") must be a multiple of the size: " + size;
ref.offset = docId*size;
ref.length = size;
pool.deref(ref);
}
protected void resetPool() {
pool.dropBuffersAndReset();
}
protected void writeData(IndexOutput out) throws IOException {
pool.writePool(out);
}
protected void writeZeros(int num, IndexOutput out) throws IOException {
final byte[] zeros = new byte[size];
for (int i = 0; i < num; i++) {
out.writeBytes(zeros, zeros.length);
}
}
}
static class Writer extends FixedBytesWriterBase {
private boolean merge;
private IndexOutput datOut;
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
}
public Writer(Directory dir, String id, String codecName, int version, Counter bytesUsed, IOContext context) throws IOException {
super(dir, id, codecName, version, bytesUsed, context);
}
@Override
protected void merge(MergeState state) throws IOException {
merge = true;
datOut = getDataOut();
datOut = getOrCreateDataOut();
boolean success = false;
try {
if (state.liveDocs == null && state.reader instanceof Reader) {
if (state.liveDocs == null && state.reader instanceof Reader ) {
Reader reader = (Reader) state.reader;
final int maxDocs = reader.maxDoc;
if (maxDocs == 0) {
@ -113,7 +146,10 @@ class FixedStraightBytesImpl {
if (size == -1) {
size = reader.size;
datOut.writeInt(size);
}
} else if (size != reader.size) {
throw new IllegalArgumentException("expected bytes size=" + size
+ " but got " + reader.size);
}
if (lastDocID+1 < state.docBase) {
fill(datOut, state.docBase);
lastDocID = state.docBase-1;
@ -137,7 +173,7 @@ class FixedStraightBytesImpl {
}
}
}
@Override
protected void mergeDoc(int docID) throws IOException {
assert lastDocID < docID;
@ -158,11 +194,7 @@ class FixedStraightBytesImpl {
// Fills up to but not including this docID
private void fill(IndexOutput datOut, int docID) throws IOException {
assert size >= 0;
final long numBytes = (docID - (lastDocID+1))*size;
final byte zero = 0;
for (long i = 0; i < numBytes; i++) {
datOut.writeByte(zero);
}
writeZeros((docID - (lastDocID+1)), datOut);
}
@Override
@ -172,12 +204,12 @@ class FixedStraightBytesImpl {
if (!merge) {
// indexing path - no disk IO until here
assert datOut == null;
datOut = getDataOut();
datOut = getOrCreateDataOut();
if (size == -1) {
datOut.writeInt(0);
} else {
datOut.writeInt(size);
pool.writePool(datOut);
writeData(datOut);
}
if (lastDocID + 1 < docCount) {
fill(datOut, docCount);
@ -193,7 +225,7 @@ class FixedStraightBytesImpl {
}
success = true;
} finally {
pool.dropBuffersAndReset();
resetPool();
if (success) {
IOUtils.close(datOut);
} else {
@ -201,14 +233,19 @@ class FixedStraightBytesImpl {
}
}
}
}
public static class Reader extends BytesReaderBase {
private final int size;
private final int maxDoc;
protected final int size;
protected final int maxDoc;
Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_START, false, context);
this(dir, id, CODEC_NAME, VERSION_CURRENT, maxDoc, context);
}
protected Reader(Directory dir, String id, String codec, int version, int maxDoc, IOContext context) throws IOException {
super(dir, id, codec, version, false, context);
size = datIn.readInt();
this.maxDoc = maxDoc;
}
@ -271,13 +308,13 @@ class FixedStraightBytesImpl {
}
private static class StraightBytesSource extends BytesBaseSource {
private final static class StraightBytesSource extends BytesSourceBase {
private final int size;
private final int maxDoc;
public StraightBytesSource(IndexInput datIn, int size, int maxDoc)
throws IOException {
super(datIn, null, new PagedBytes(PAGED_BYTES_BITS), size * maxDoc);
super(datIn, null, new PagedBytes(PAGED_BYTES_BITS), size * maxDoc, ValueType.BYTES_FIXED_STRAIGHT);
this.size = size;
this.maxDoc = maxDoc;
}
@ -292,11 +329,6 @@ class FixedStraightBytesImpl {
return maxDoc;
}
@Override
public ValueType type() {
return ValueType.BYTES_FIXED_STRAIGHT;
}
@Override
protected int maxDoc() {
return maxDoc;
@ -308,66 +340,68 @@ class FixedStraightBytesImpl {
return new FixedStraightBytesEnum(source, cloneData(), size, maxDoc);
}
private static final class FixedStraightBytesEnum extends ValuesEnum {
private final IndexInput datIn;
private final int size;
private final int maxDoc;
private int pos = -1;
private final long fp;
public FixedStraightBytesEnum(AttributeSource source, IndexInput datIn,
int size, int maxDoc) throws IOException {
super(source, ValueType.BYTES_FIXED_STRAIGHT);
this.datIn = datIn;
this.size = size;
this.maxDoc = maxDoc;
bytesRef.grow(size);
bytesRef.length = size;
bytesRef.offset = 0;
fp = datIn.getFilePointer();
}
protected void copyFrom(ValuesEnum valuesEnum) {
bytesRef = valuesEnum.bytesRef;
if (bytesRef.bytes.length < size) {
bytesRef.grow(size);
}
bytesRef.length = size;
bytesRef.offset = 0;
}
public void close() throws IOException {
datIn.close();
}
@Override
public int advance(int target) throws IOException {
if (target >= maxDoc || size == 0) {
return pos = NO_MORE_DOCS;
}
if ((target - 1) != pos) // pos inc == 1
datIn.seek(fp + target * size);
datIn.readBytes(bytesRef.bytes, 0, size);
return pos = target;
}
@Override
public int docID() {
return pos;
}
@Override
public int nextDoc() throws IOException {
if (pos >= maxDoc) {
return pos = NO_MORE_DOCS;
}
return advance(pos + 1);
}
}
@Override
public ValueType type() {
return ValueType.BYTES_FIXED_STRAIGHT;
}
}
static class FixedStraightBytesEnum extends ValuesEnum {
private final IndexInput datIn;
private final int size;
private final int maxDoc;
private int pos = -1;
private final long fp;
public FixedStraightBytesEnum(AttributeSource source, IndexInput datIn,
int size, int maxDoc) throws IOException {
super(source, ValueType.BYTES_FIXED_STRAIGHT);
this.datIn = datIn;
this.size = size;
this.maxDoc = maxDoc;
bytesRef.grow(size);
bytesRef.length = size;
bytesRef.offset = 0;
fp = datIn.getFilePointer();
}
protected void copyFrom(ValuesEnum valuesEnum) {
super.copyFrom(valuesEnum);
if (bytesRef.bytes.length < size) {
bytesRef.grow(size);
}
bytesRef.length = size;
bytesRef.offset = 0;
}
public void close() throws IOException {
datIn.close();
}
@Override
public int advance(int target) throws IOException {
if (target >= maxDoc || size == 0) {
return pos = NO_MORE_DOCS;
}
if ((target - 1) != pos) // pos inc == 1
datIn.seek(fp + target * size);
datIn.readBytes(bytesRef.bytes, 0, size);
return pos = target;
}
@Override
public int docID() {
return pos;
}
@Override
public int nextDoc() throws IOException {
if (pos >= maxDoc) {
return pos = NO_MORE_DOCS;
}
return advance(pos + 1);
}
}
}

View File

@ -17,21 +17,14 @@ package org.apache.lucene.index.values;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FloatsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Exposes {@link Writer} and reader ({@link Source}) for 32 bit and 64 bit
@ -43,11 +36,6 @@ import org.apache.lucene.util.RamUsageEstimator;
* @lucene.experimental
*/
public class Floats {
// TODO - add bulk copy where possible
private static final String CODEC_NAME = "SimpleFloats";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
private static final byte[] DEFAULTS = new byte[] {0,0,0,0,0,0,0,0};
public static Writer getWriter(Directory dir, String id, int precisionBytes,
Counter bytesUsed, IOContext context) throws IOException {
@ -55,493 +43,73 @@ public class Floats {
throw new IllegalArgumentException("precisionBytes must be 4 or 8; got "
+ precisionBytes);
}
if (precisionBytes == 4) {
return new Float4Writer(dir, id, bytesUsed, context);
} else {
return new Float8Writer(dir, id, bytesUsed, context);
}
return new FloatsWriter(dir, id, bytesUsed, context, precisionBytes);
}
public static IndexDocValues getValues(Directory dir, String id, int maxDoc, IOContext context)
throws IOException {
return new FloatsReader(dir, id, maxDoc, context);
}
abstract static class FloatsWriter extends Writer {
private final String id;
protected FloatsRef floatsRef;
protected int lastDocId = -1;
protected IndexOutput datOut;
private final byte precision;
private final Directory dir;
private final IOContext context;
protected FloatsWriter(Directory dir, String id, int precision,
Counter bytesUsed, IOContext context) throws IOException {
super(bytesUsed);
this.id = id;
this.precision = (byte) precision;
this.dir = dir;
this.context = context;
}
public long ramBytesUsed() {
return 0;
final static class FloatsWriter extends FixedStraightBytesImpl.Writer {
private final int size;
public FloatsWriter(Directory dir, String id, Counter bytesUsed,
IOContext context, int size) throws IOException {
super(dir, id, bytesUsed, context);
this.bytesRef = new BytesRef(size);
this.size = size;
bytesRef.length = size;
}
final void initDataOut() throws IOException {
assert datOut == null;
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
Writer.DATA_EXTENSION), context);
boolean success = false;
try {
CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT);
assert datOut.getFilePointer() == CodecUtil.headerLength(CODEC_NAME);
datOut.writeByte(this.precision);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(datOut);
}
public void add(int docID, double v) throws IOException {
if (size == 8) {
bytesRef.copy(Double.doubleToRawLongBits(v));
} else {
bytesRef.copy(Float.floatToRawIntBits((float)v));
}
add(docID, bytesRef);
}
@Override
protected void mergeDoc(int docID) throws IOException {
add(docID, floatsRef.get());
}
@Override
public void add(int docID, PerDocFieldValues docValues) throws IOException {
add(docID, docValues.getFloat());
}
}
@Override
protected void setNextEnum(ValuesEnum valuesEnum) {
floatsRef = valuesEnum.getFloat();
}
protected final int fillDefault(int numValues) throws IOException {
for (int i = 0; i < numValues; i++) {
datOut.writeBytes(DEFAULTS, precision);
}
return numValues;
}
@Override
protected void merge(MergeState state) throws IOException {
if (datOut == null) {
initDataOut();
}
if (state.liveDocs == null && state.reader instanceof FloatsReader) {
// no deletes - bulk copy
final FloatsReader reader = (FloatsReader) state.reader;
assert reader.precisionBytes == (int) precision;
if (reader.maxDoc == 0)
return;
final int docBase = state.docBase;
if (docBase - lastDocId > 1) {
// fill with default values
lastDocId += fillDefault(docBase - lastDocId - 1);
}
lastDocId += reader.transferTo(datOut);
final static class FloatsReader extends FixedStraightBytesImpl.Reader {
final IndexDocValuesArray arrayTemplate;
FloatsReader(Directory dir, String id, int maxDoc, IOContext context)
throws IOException {
super(dir, id, maxDoc, context);
assert size == 4 || size == 8;
if (size == 4) {
arrayTemplate = new IndexDocValuesArray.FloatValues();
} else {
super.merge(state);
arrayTemplate = new IndexDocValuesArray.DoubleValues();
}
}
@Override
public void files(Collection<String> files) throws IOException {
files.add(IndexFileNames.segmentFileName(id, "", Writer.DATA_EXTENSION));
}
}
// Writes 4 bytes (float) per value
static final class Float4Writer extends FloatsWriter {
private int[] values;
protected Float4Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
throws IOException {
super(dir, id, 4, bytesUsed, context);
values = new int[1];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
}
@Override
public void add(final int docID, final double v)
throws IOException {
assert docID > lastDocId : "docID: " + docID
+ " must be greater than the last added doc id: " + lastDocId;
if (docID >= values.length) {
final long len = values.length;
values = ArrayUtil.grow(values, 1 + docID);
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
* ((values.length) - len));
}
values[docID] = Float.floatToRawIntBits((float)v);
lastDocId = docID;
}
@Override
protected void mergeDoc(int docID) throws IOException {
assert datOut != null;
assert docID > lastDocId : "docID: " + docID
+ " must be greater than the last added doc id: " + lastDocId;
if (docID - lastDocId > 1) {
// fill with default values
fillDefault(docID - lastDocId - 1);
}
assert datOut != null;
datOut.writeInt(Float.floatToRawIntBits((float) floatsRef.get()));
lastDocId = docID;
}
@Override
public void finish(int docCount) throws IOException {
boolean success = false;
try {
int numDefaultsToAppend = docCount - (lastDocId + 1);
if (datOut == null) {
initDataOut();
for (int i = 0; i <= lastDocId; i++) {
datOut.writeInt(values[i]);
}
}
fillDefault(numDefaultsToAppend);
success = true;
} finally {
bytesUsed.addAndGet(-(RamUsageEstimator.NUM_BYTES_INT
* ((values.length))));
values = null;
if (success) {
IOUtils.close(datOut);
} else {
IOUtils.closeWhileHandlingException(datOut);
}
}
}
}
// Writes 8 bytes (double) per value
static final class Float8Writer extends FloatsWriter {
private long[] values;
protected Float8Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
throws IOException {
super(dir, id, 8, bytesUsed, context);
values = new long[1];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG);
}
@Override
public void add(int docID, double v) throws IOException {
assert docID > lastDocId : "docID: " + docID
+ " must be greater than the last added doc id: " + lastDocId;
if (docID >= values.length) {
final long len = values.length;
values = ArrayUtil.grow(values, 1 + docID);
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG
* ((values.length) - len));
}
values[docID] = Double.doubleToLongBits(v);
lastDocId = docID;
}
@Override
protected void mergeDoc(int docID) throws IOException {
assert docID > lastDocId : "docID: " + docID
+ " must be greater than the last added doc id: " + lastDocId;
if (docID - lastDocId > 1) {
// fill with default values
lastDocId += fillDefault(docID - lastDocId - 1);
}
assert datOut != null;
datOut.writeLong(Double.doubleToRawLongBits((float) floatsRef.get()));
lastDocId = docID;
}
@Override
public void finish(int docCount) throws IOException {
boolean success = false;
public Source load() throws IOException {
final IndexInput indexInput = cloneData();
try {
int numDefaultsToAppend = docCount - (lastDocId + 1);
if (datOut == null) {
initDataOut();
for (int i = 0; i <= lastDocId; i++) {
datOut.writeLong(values[i]);
}
}
fillDefault(numDefaultsToAppend);
success = true;
} finally {
bytesUsed.addAndGet(-(RamUsageEstimator.NUM_BYTES_LONG
* ((values.length))));
values = null;
if (success) {
IOUtils.close(datOut);
} else {
IOUtils.closeWhileHandlingException(datOut);
}
}
}
}
/**
* Opens all necessary files, but does not read any data in until you call
* {@link #load}.
*/
static class FloatsReader extends IndexDocValues {
private final IndexInput datIn;
private final int precisionBytes;
// TODO(simonw) is ByteBuffer the way to go here?
private final int maxDoc;
protected FloatsReader(Directory dir, String id, int maxDoc, IOContext context)
throws IOException {
datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
Writer.DATA_EXTENSION), context);
CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START);
precisionBytes = datIn.readByte();
assert precisionBytes == 4 || precisionBytes == 8;
this.maxDoc = maxDoc;
}
int transferTo(IndexOutput out) throws IOException {
IndexInput indexInput = (IndexInput) datIn.clone();
try {
indexInput.seek(CodecUtil.headerLength(CODEC_NAME));
// skip precision:
indexInput.readByte();
out.copyBytes(indexInput, precisionBytes * maxDoc);
return arrayTemplate.newFromInput(indexInput, maxDoc);
} finally {
indexInput.close();
}
return maxDoc;
}
/**
* Loads the actual values. You may call this more than once, eg if you
* already previously loaded but then discarded the Source.
*/
@Override
public Source load() throws IOException {
/* we always read BIG_ENDIAN here since the writer uses
* DataOutput#writeInt() / writeLong() we can simply read the ints / longs
* back in using readInt / readLong */
final IndexInput indexInput = (IndexInput) datIn.clone();
indexInput.seek(CodecUtil.headerLength(CODEC_NAME));
// skip precision:
indexInput.readByte();
if (precisionBytes == 4) {
final float[] values = new float[(4 * maxDoc) >> 2];
assert values.length == maxDoc;
for (int i = 0; i < values.length; i++) {
values[i] = Float.intBitsToFloat(indexInput.readInt());
}
return new Source4(values);
} else {
final double[] values = new double[(8 * maxDoc) >> 3];
assert values.length == maxDoc;
for (int i = 0; i < values.length; i++) {
values[i] = Double.longBitsToDouble(indexInput.readLong());
}
return new Source8(values);
}
}
private final class Source4 extends Source {
private final float[] values;
Source4(final float[] values ) throws IOException {
this.values = values;
}
@Override
public double getFloat(int docID) {
return values[docID];
}
@Override
public ValuesEnum getEnum(AttributeSource attrSource)
throws IOException {
return new SourceEnum(attrSource, ValueType.FLOAT_32, this, maxDoc) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs)
return pos = NO_MORE_DOCS;
floatsRef.floats[floatsRef.offset] = source.getFloat(target);
return pos = target;
}
};
}
@Override
public Object getArray() {
return this.values;
}
@Override
public boolean hasArray() {
return true;
}
@Override
public ValueType type() {
return ValueType.FLOAT_32;
}
}
private final class Source8 extends Source {
private final double[] values;
Source8(final double[] values) throws IOException {
this.values = values;
}
@Override
public double getFloat(int docID) {
return values[docID];
}
@Override
public ValuesEnum getEnum(AttributeSource attrSource)
throws IOException {
return new SourceEnum(attrSource, type(), this, maxDoc) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs)
return pos = NO_MORE_DOCS;
floatsRef.floats[floatsRef.offset] = source.getFloat(target);
return pos = target;
}
};
}
@Override
public ValueType type() {
return ValueType.FLOAT_64;
}
@Override
public Object getArray() {
return this.values;
}
@Override
public boolean hasArray() {
return true;
}
}
@Override
public void close() throws IOException {
super.close();
datIn.close();
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
IndexInput indexInput = (IndexInput) datIn.clone();
indexInput.seek(CodecUtil.headerLength(CODEC_NAME));
// skip precision:
indexInput.readByte();
return precisionBytes == 4 ? new Floats4Enum(source, indexInput, maxDoc)
: new Floats8EnumImpl(source, indexInput, maxDoc);
return arrayTemplate.getDirectEnum(source, indexInput, maxDoc);
}
@Override
public ValueType type() {
return precisionBytes == 4 ? ValueType.FLOAT_32
: ValueType.FLOAT_64;
return arrayTemplate.type();
}
}
static final class Floats4Enum extends FloatsEnumImpl {
Floats4Enum(AttributeSource source, IndexInput dataIn, int maxDoc)
throws IOException {
super(source, dataIn, 4, maxDoc, ValueType.FLOAT_32);
}
@Override
public int advance(int target) throws IOException {
if (target >= maxDoc)
return pos = NO_MORE_DOCS;
dataIn.seek(fp + (target * precision));
final int intBits = dataIn.readInt();
floatsRef.floats[0] = Float.intBitsToFloat(intBits);
floatsRef.offset = 0;
return pos = target;
}
@Override
public int docID() {
return pos;
}
@Override
public int nextDoc() throws IOException {
if (pos >= maxDoc) {
return pos = NO_MORE_DOCS;
}
return advance(pos + 1);
}
}
private static final class Floats8EnumImpl extends FloatsEnumImpl {
Floats8EnumImpl(AttributeSource source, IndexInput dataIn, int maxDoc)
throws IOException {
super(source, dataIn, 8, maxDoc, ValueType.FLOAT_64);
}
@Override
public int advance(int target) throws IOException {
if (target >= maxDoc) {
return pos = NO_MORE_DOCS;
}
dataIn.seek(fp + (target * precision));
final long value = dataIn.readLong();
floatsRef.floats[floatsRef.offset] = Double.longBitsToDouble(value);
return pos = target;
}
@Override
public int docID() {
return pos;
}
@Override
public int nextDoc() throws IOException {
if (pos >= maxDoc) {
return pos = NO_MORE_DOCS;
}
return advance(pos + 1);
}
}
static abstract class FloatsEnumImpl extends ValuesEnum {
protected final IndexInput dataIn;
protected int pos = -1;
protected final int precision;
protected final int maxDoc;
protected final long fp;
FloatsEnumImpl(AttributeSource source, IndexInput dataIn, int precision,
int maxDoc, ValueType type) throws IOException {
super(source, precision == 4 ? ValueType.FLOAT_32
: ValueType.FLOAT_64);
this.dataIn = dataIn;
this.precision = precision;
this.maxDoc = maxDoc;
fp = dataIn.getFilePointer();
floatsRef.offset = 0;
}
@Override
public void close() throws IOException {
dataIn.close();
}
}
}

View File

@ -2,14 +2,12 @@ package org.apache.lucene.index.values;
import java.io.IOException;
import org.apache.lucene.index.values.FixedStraightBytesImpl.FixedStraightBytesEnum;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
/**
@ -34,48 +32,33 @@ import org.apache.lucene.util.RamUsageEstimator;
*/
abstract class IndexDocValuesArray extends Source {
private final Counter bytesUsed;
private final int bytesPerValue;
private int size = 0;
protected final int bytesPerValue;
private final ValueType type;
private final boolean isFloat;
protected int maxDocID = -1;
IndexDocValuesArray(Counter bytesUsed, int bytesPerValue, ValueType type) {
this.bytesUsed = bytesUsed;
IndexDocValuesArray(int bytesPerValue, ValueType type) {
this.bytesPerValue = bytesPerValue;
this.type = type;
}
switch (type) {
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
isFloat = false;
break;
case FLOAT_32:
case FLOAT_64:
isFloat = true;
break;
default:
throw new IllegalStateException("illegal type: " + type);
void set(int docId, long value) {
if (docId >= size) {
adjustSize(grow(docId + 1));
}
if (docId > maxDocID) {
maxDocID = docId;
}
setInternal(docId, value);
}
protected final void adjustSize(int newSize) {
bytesUsed.addAndGet(bytesPerValue * (newSize - size));
size = newSize;
}
void clear() {
adjustSize(0);
maxDocID = -1;
size = 0;
}
protected abstract void writeDirect(IndexOutput out, long value) throws IOException;
protected abstract void writeDefaults(IndexOutput out, int num) throws IOException;
protected abstract void setInternal(int docId, long value);
protected abstract int grow(int numDocs);
abstract void write(IndexOutput output, int numDocs) throws IOException;
public abstract IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException;
@Override
public final int getValueCount() {
@ -89,21 +72,38 @@ abstract class IndexDocValuesArray extends Source {
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
return new SourceEnum(attrSource, type(), this, maxDocID + 1) {
if (isFloat) {
return new SourceEnum(attrSource, type(), this, maxDocID + 1) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs) {
return pos = NO_MORE_DOCS;
@Override
public int advance(int target) throws IOException {
if (target >= numDocs) {
return pos = NO_MORE_DOCS;
}
floatsRef.floats[intsRef.offset] = IndexDocValuesArray.this
.getFloat(target);
return pos = target;
}
intsRef.ints[intsRef.offset] = IndexDocValuesArray.this.getInt(target);
return pos = target;
}
};
};
} else {
return new SourceEnum(attrSource, type(), this, maxDocID + 1) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs) {
return pos = NO_MORE_DOCS;
}
intsRef.ints[intsRef.offset] = IndexDocValuesArray.this
.getInt(target);
return pos = target;
}
};
}
}
abstract ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input, int maxDoc)
throws IOException;
abstract ValuesEnum getDirectEnum(AttributeSource attrSource,
IndexInput input, int maxDoc) throws IOException;
@Override
public final boolean hasArray() {
@ -111,17 +111,16 @@ abstract class IndexDocValuesArray extends Source {
}
final static class ByteValues extends IndexDocValuesArray {
private byte[] values;
private final byte[] values;
ByteValues(Counter bytesUsed) {
super(bytesUsed, 1, ValueType.FIXED_INTS_8);
ByteValues() {
super(1, ValueType.FIXED_INTS_8);
values = new byte[0];
}
ByteValues(IndexInput input, int numDocs) throws IOException {
super(Counter.newCounter(), 1, ValueType.FIXED_INTS_8);
private ByteValues(IndexInput input, int numDocs) throws IOException {
super(1, ValueType.FIXED_INTS_8);
values = new byte[numDocs];
adjustSize(numDocs);
input.readBytes(values, 0, values.length, false);
maxDocID = numDocs - 1;
}
@ -138,69 +137,37 @@ abstract class IndexDocValuesArray extends Source {
}
@Override
protected void setInternal(int docId, long value) {
values[docId] = (byte) (0xFFL & value);
}
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
int maxDoc) throws IOException {
return new FixedIntsEnum(attrSource, input, type(),
bytesPerValue, maxDoc) {
@Override
protected int grow(int numDocs) {
values = ArrayUtil.grow(values, numDocs);
return values.length;
}
@Override
void write(IndexOutput output, int numDocs) throws IOException {
assert maxDocID + 1 <= numDocs;
output.writeBytes(values, 0, maxDocID + 1);
writeDefaults(output, numDocs - (maxDocID+1));
}
@Override
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input, int maxDoc)
throws IOException {
return new FixedIntsEnumImpl(attrSource, input, type(), maxDoc) {
@Override
protected void fillNext(LongsRef ref, IndexInput dataIn)
throws IOException {
ref.ints[ref.offset] = dataIn.readByte();
protected final long toLong(BytesRef bytesRef) {
return bytesRef.bytes[bytesRef.offset];
}
};
}
@Override
void clear() {
super.clear();
values = new byte[0];
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException {
return new ByteValues(input, numDocs);
}
@Override
protected void writeDefaults(IndexOutput out, int num) throws IOException {
final byte zero = 0;
for (int i = 0; i < num; i++) {
out.writeByte(zero);
}
}
@Override
protected void writeDirect(IndexOutput out, long value) throws IOException {
out.writeByte((byte) (0xFFL & value));
}
};
final static class ShortValues extends IndexDocValuesArray {
private short[] values;
private final short[] values;
ShortValues(Counter bytesUsed) {
super(bytesUsed, RamUsageEstimator.NUM_BYTES_SHORT,
ValueType.FIXED_INTS_16);
ShortValues() {
super(RamUsageEstimator.NUM_BYTES_SHORT, ValueType.FIXED_INTS_16);
values = new short[0];
}
ShortValues(IndexInput input, int numDocs) throws IOException {
super(Counter.newCounter(), RamUsageEstimator.NUM_BYTES_SHORT,
ValueType.FIXED_INTS_16);
private ShortValues(IndexInput input, int numDocs) throws IOException {
super(RamUsageEstimator.NUM_BYTES_SHORT, ValueType.FIXED_INTS_16);
values = new short[numDocs];
adjustSize(numDocs);
for (int i = 0; i < values.length; i++) {
values[i] = input.readShort();
}
@ -219,71 +186,37 @@ abstract class IndexDocValuesArray extends Source {
}
@Override
protected void setInternal(int docId, long value) {
values[docId] = (short) (0xFFFFL & value);
}
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
int maxDoc) throws IOException {
return new FixedIntsEnum(attrSource, input, type(),
bytesPerValue, maxDoc) {
@Override
protected int grow(int numDocs) {
values = ArrayUtil.grow(values, numDocs);
return values.length;
}
@Override
void write(IndexOutput output, int numDocs) throws IOException {
assert maxDocID + 1 <= numDocs;
for (int i = 0; i < maxDocID + 1; i++) {
output.writeShort(values[i]);
}
writeDefaults(output, numDocs - (maxDocID+1));
}
@Override
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input, int maxDoc)
throws IOException {
return new FixedIntsEnumImpl(attrSource, input, type(), maxDoc) {
@Override
protected void fillNext(LongsRef ref, IndexInput dataIn)
throws IOException {
ref.ints[ref.offset] = dataIn.readShort();
protected final long toLong(BytesRef bytesRef) {
return bytesRef.asShort();
}
};
}
@Override
void clear() {
super.clear();
values = new short[0];
}
@Override
protected void writeDefaults(IndexOutput out, int num) throws IOException {
final short zero = 0;
for (int i = 0; i < num; i++) {
out.writeShort(zero);
}
}
@Override
protected void writeDirect(IndexOutput out, long value) throws IOException {
out.writeShort((short) (0xFFFFL & value));
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException {
return new ShortValues(input, numDocs);
}
};
final static class IntValues extends IndexDocValuesArray {
private int[] values;
private final int[] values;
IntValues(Counter bytesUsed) {
super(bytesUsed, RamUsageEstimator.NUM_BYTES_INT, ValueType.FIXED_INTS_32);
IntValues() {
super(RamUsageEstimator.NUM_BYTES_INT, ValueType.FIXED_INTS_32);
values = new int[0];
}
IntValues(IndexInput input, int numDocs) throws IOException {
super(Counter.newCounter(), RamUsageEstimator.NUM_BYTES_INT,
ValueType.FIXED_INTS_32);
private IntValues(IndexInput input, int numDocs) throws IOException {
super(RamUsageEstimator.NUM_BYTES_INT, ValueType.FIXED_INTS_32);
values = new int[numDocs];
adjustSize(numDocs);
for (int i = 0; i < values.length; i++) {
values[i] = input.readInt();
}
@ -302,71 +235,36 @@ abstract class IndexDocValuesArray extends Source {
}
@Override
protected void setInternal(int docId, long value) {
values[docId] = (int) (0xFFFFFFFF & value);
}
@Override
protected int grow(int numDocs) {
values = ArrayUtil.grow(values, numDocs);
return values.length;
}
@Override
void write(IndexOutput output, int numDocs) throws IOException {
assert maxDocID + 1 <= numDocs;
for (int i = 0; i < maxDocID + 1; i++) {
output.writeInt(values[i]);
}
writeDefaults(output, numDocs - (maxDocID+1));
}
@Override
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input, int maxDoc)
throws IOException {
return new FixedIntsEnumImpl(attrSource, input, type(), maxDoc) {
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
int maxDoc) throws IOException {
return new FixedIntsEnum(attrSource, input, type(),
bytesPerValue, maxDoc) {
@Override
protected void fillNext(LongsRef ref, IndexInput dataIn)
throws IOException {
ref.ints[ref.offset] = dataIn.readInt();
protected final long toLong(BytesRef bytesRef) {
return bytesRef.asInt();
}
};
}
@Override
void clear() {
super.clear();
values = new int[0];
}
@Override
protected void writeDefaults(IndexOutput out, int num) throws IOException {
for (int i = 0; i < num; i++) {
out.writeInt(0);
}
}
@Override
protected void writeDirect(IndexOutput out, long value) throws IOException {
out.writeInt((int) (0xFFFFFFFFL & value));
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException {
return new IntValues(input, numDocs);
}
};
final static class LongValues extends IndexDocValuesArray {
private long[] values;
private final long[] values;
LongValues(Counter bytesUsed) {
super(bytesUsed, RamUsageEstimator.NUM_BYTES_LONG,
ValueType.FIXED_INTS_64);
LongValues() {
super(RamUsageEstimator.NUM_BYTES_LONG, ValueType.FIXED_INTS_64);
values = new long[0];
}
LongValues(IndexInput input, int numDocs) throws IOException {
super(Counter.newCounter(), RamUsageEstimator.NUM_BYTES_LONG,
ValueType.FIXED_INTS_64);
private LongValues(IndexInput input, int numDocs) throws IOException {
super(RamUsageEstimator.NUM_BYTES_LONG, ValueType.FIXED_INTS_64);
values = new long[numDocs];
adjustSize(numDocs);
for (int i = 0; i < values.length; i++) {
values[i] = input.readLong();
}
@ -385,122 +283,179 @@ abstract class IndexDocValuesArray extends Source {
}
@Override
protected void setInternal(int docId, long value) {
values[docId] = value;
}
@Override
protected int grow(int numDocs) {
values = ArrayUtil.grow(values, numDocs);
return values.length;
}
@Override
void write(IndexOutput output, int numDocs) throws IOException {
assert maxDocID + 1 <= numDocs;
for (int i = 0; i < maxDocID + 1; i++) {
output.writeLong(values[i]);
}
writeDefaults(output, numDocs - (maxDocID+1));
}
@Override
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input, int maxDoc)
throws IOException {
return new FixedIntsEnumImpl(attrSource, input, type(), maxDoc) {
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
int maxDoc) throws IOException {
return new FixedIntsEnum(attrSource, input, type(),
bytesPerValue, maxDoc) {
@Override
protected void fillNext(LongsRef ref, IndexInput dataIn)
throws IOException {
ref.ints[ref.offset] = dataIn.readLong();
protected final long toLong(BytesRef bytesRef) {
return bytesRef.asLong();
}
};
}
@Override
void clear() {
super.clear();
values = new long[0];
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException {
return new LongValues(input, numDocs);
}
};
final static class FloatValues extends IndexDocValuesArray {
private final float[] values;
FloatValues() {
super(RamUsageEstimator.NUM_BYTES_FLOAT, ValueType.FLOAT_32);
values = new float[0];
}
private FloatValues(IndexInput input, int numDocs) throws IOException {
super(RamUsageEstimator.NUM_BYTES_FLOAT, ValueType.FLOAT_32);
values = new float[numDocs];
/* we always read BIG_ENDIAN here since the writer serialized plain bytes
* we can simply read the ints / longs
* back in using readInt / readLong */
for (int i = 0; i < values.length; i++) {
values[i] = Float.intBitsToFloat(input.readInt());
}
maxDocID = numDocs - 1;
}
@Override
protected void writeDefaults(IndexOutput out, int num) throws IOException {
for (int i = 0; i < num; i++) {
out.writeLong(0l);
}
public float[] getArray() {
return values;
}
@Override
protected void writeDirect(IndexOutput out, long value) throws IOException {
out.writeLong(value);
public double getFloat(int docID) {
assert docID >= 0 && docID < values.length;
return values[docID];
}
@Override
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
int maxDoc) throws IOException {
return new FloatsEnum(attrSource, input, type(),
bytesPerValue, maxDoc) {
@Override
protected double toDouble(BytesRef bytesRef) {
return Float.intBitsToFloat(bytesRef.asInt());
}
};
}
@Override
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException {
return new FloatValues(input, numDocs);
}
};
final static class DoubleValues extends IndexDocValuesArray {
private final double[] values;
DoubleValues() {
super(RamUsageEstimator.NUM_BYTES_DOUBLE, ValueType.FLOAT_64);
values = new double[0];
}
private DoubleValues(IndexInput input, int numDocs) throws IOException {
super(RamUsageEstimator.NUM_BYTES_DOUBLE, ValueType.FLOAT_64);
values = new double[numDocs];
/* we always read BIG_ENDIAN here since the writer serialized plain bytes
* we can simply read the ints / longs
* back in using readInt / readLong */
for (int i = 0; i < values.length; i++) {
values[i] = Double.longBitsToDouble(input.readLong());
}
maxDocID = numDocs - 1;
}
@Override
public double[] getArray() {
return values;
}
@Override
public double getFloat(int docID) {
assert docID >= 0 && docID < values.length;
return values[docID];
}
@Override
ValuesEnum getDirectEnum(AttributeSource attrSource, IndexInput input,
int maxDoc) throws IOException {
return new FloatsEnum(attrSource, input, type(),
bytesPerValue, maxDoc) {
@Override
protected double toDouble(BytesRef bytesRef) {
return Double.longBitsToDouble(bytesRef.asLong());
}
};
}
@Override
public IndexDocValuesArray newFromInput(IndexInput input, int numDocs)
throws IOException {
return new DoubleValues(input, numDocs);
}
};
private abstract static class FixedIntsEnumImpl extends ValuesEnum {
private final IndexInput dataIn;
private final int maxDoc;
private final int sizeInByte;
private int pos = -1;
private abstract static class FixedIntsEnum extends
FixedStraightBytesEnum {
private final ValueType type;
private FixedIntsEnumImpl(AttributeSource source, IndexInput dataIn,
ValueType type, int maxDoc) throws IOException {
super(source, type);
switch (type) {
case FIXED_INTS_16:
sizeInByte = 2;
break;
case FIXED_INTS_32:
sizeInByte = 4;
break;
case FIXED_INTS_64:
sizeInByte = 8;
break;
case FIXED_INTS_8:
sizeInByte = 1;
break;
default:
throw new IllegalStateException("type " + type
+ " is not a fixed int type");
}
intsRef.offset = 0;
this.dataIn = dataIn;
this.maxDoc = maxDoc;
private FixedIntsEnum(AttributeSource source, IndexInput dataIn,
ValueType type, int bytesPerValue, int maxDoc) throws IOException {
super(source, dataIn, bytesPerValue, maxDoc);
this.type = type;
}
@Override
public void close() throws IOException {
dataIn.close();
}
@Override
public int advance(int target) throws IOException {
if (target >= maxDoc) {
return pos = NO_MORE_DOCS;
final int advance = super.advance(target);
if (advance != NO_MORE_DOCS) {
intsRef.ints[0] = toLong(this.bytesRef);
}
assert target > pos;
if (target > pos + 1) {
dataIn
.seek(dataIn.getFilePointer() + ((target - pos - 1) * sizeInByte));
}
fillNext(intsRef, dataIn);
return pos = target;
return advance;
}
protected abstract void fillNext(LongsRef ref, IndexInput input)
throws IOException;
protected abstract long toLong(BytesRef bytesRef);
@Override
public int docID() {
return pos;
public ValueType type() {
return type;
}
@Override
public int nextDoc() throws IOException {
if (pos >= maxDoc) {
return pos = NO_MORE_DOCS;
}
return advance(pos + 1);
}
private abstract static class FloatsEnum extends FixedStraightBytesEnum {
private final ValueType type;
FloatsEnum(AttributeSource source, IndexInput dataIn, ValueType type, int bytePerValue, int maxDoc)
throws IOException {
super(source, dataIn, bytePerValue, maxDoc);
this.type = type;
}
@Override
public int advance(int target) throws IOException {
final int retVal = super.advance(target);
if (retVal != NO_MORE_DOCS) {
floatsRef.floats[floatsRef.offset] = toDouble(bytesRef);
}
return retVal;
}
protected abstract double toDouble(BytesRef bytesRef);
@Override
public ValueType type() {
return type;
}
}
}

View File

@ -19,28 +19,169 @@ package org.apache.lucene.index.values;
import java.io.IOException;
import org.apache.lucene.index.values.IntsImpl.IntsReader;
import org.apache.lucene.index.values.IntsImpl.IntsWriter;
import org.apache.lucene.index.values.IndexDocValuesArray.ByteValues;
import org.apache.lucene.index.values.IndexDocValuesArray.IntValues;
import org.apache.lucene.index.values.IndexDocValuesArray.LongValues;
import org.apache.lucene.index.values.IndexDocValuesArray.ShortValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
/**
* Stores ints packed and fixed with fixed-bit precision.
*
* @lucene.experimental
*/
public class Ints {
// TODO - add bulk copy where possible
public final class Ints {
private Ints() {
}
public static Writer getWriter(Directory dir, String id,
Counter bytesUsed, ValueType type, IOContext context) throws IOException {
return new IntsWriter(dir, id, bytesUsed, type, context);
public static Writer getWriter(Directory dir, String id, Counter bytesUsed,
ValueType type, IOContext context) throws IOException {
return type == ValueType.VAR_INTS ? new PackedIntValues.PackedIntsWriter(dir, id,
bytesUsed, context) : new IntsWriter(dir, id, bytesUsed, context, type);
}
public static IndexDocValues getValues(Directory dir, String id,
int numDocs, IOContext context) throws IOException {
return new IntsReader(dir, id, numDocs, context);
public static IndexDocValues getValues(Directory dir, String id, int numDocs,
ValueType type, IOContext context) throws IOException {
return type == ValueType.VAR_INTS ? new PackedIntValues.PackedIntsReader(dir, id,
numDocs, context) : new IntsReader(dir, id, numDocs, context);
}
static class IntsWriter extends FixedStraightBytesImpl.Writer {
protected static final String CODEC_NAME = "Ints";
protected static final int VERSION_START = 0;
protected static final int VERSION_CURRENT = VERSION_START;
private final ValueType valueType;
public IntsWriter(Directory dir, String id, Counter bytesUsed,
IOContext context, ValueType valueType) throws IOException {
this(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context, valueType);
}
protected IntsWriter(Directory dir, String id, String codecName,
int version, Counter bytesUsed, IOContext context, ValueType valueType) throws IOException {
super(dir, id, codecName, version, bytesUsed, context);
this.valueType = valueType;
final int expectedSize = getSize(valueType);
this.bytesRef = new BytesRef(expectedSize);
bytesRef.length = expectedSize;
}
private static int getSize(ValueType type) {
switch (type) {
case FIXED_INTS_16:
return 2;
case FIXED_INTS_32:
return 4;
case FIXED_INTS_64:
return 8;
case FIXED_INTS_8:
return 1;
default:
throw new IllegalStateException("illegal type " + type);
}
}
@Override
public void add(int docID, long v) throws IOException {
switch (valueType) {
case FIXED_INTS_64:
bytesRef.copy(v);
break;
case FIXED_INTS_32:
bytesRef.copy((int) (0xFFFFFFFF & v));
break;
case FIXED_INTS_16:
bytesRef.copy((short) (0xFFFFL & v));
break;
case FIXED_INTS_8:
bytesRef.bytes[0] = (byte) (0xFFL & v);
break;
default:
throw new IllegalStateException("illegal type " + valueType);
}
add(docID, bytesRef);
}
@Override
public void add(int docID, PerDocFieldValues docValues) throws IOException {
add(docID, docValues.getInt());
}
}
final static class IntsReader extends FixedStraightBytesImpl.Reader {
private final ValueType type;
private final IndexDocValuesArray arrayTemplate;
IntsReader(Directory dir, String id, int maxDoc, IOContext context)
throws IOException {
super(dir, id, IntsWriter.CODEC_NAME, IntsWriter.VERSION_CURRENT, maxDoc,
context);
switch (size) {
case 8:
type = ValueType.FIXED_INTS_64;
arrayTemplate = new LongValues();
break;
case 4:
type = ValueType.FIXED_INTS_32;
arrayTemplate = new IntValues();
break;
case 2:
type = ValueType.FIXED_INTS_16;
arrayTemplate = new ShortValues();
break;
case 1:
type = ValueType.FIXED_INTS_8;
arrayTemplate = new ByteValues();
break;
default:
throw new IllegalStateException("illegal size: " + size);
}
}
@Override
public Source load() throws IOException {
boolean success = false;
IndexInput input = null;
try {
input = cloneData();
final Source source = arrayTemplate.newFromInput(input, maxDoc);
success = true;
return source;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input, datIn);
}
}
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
final IndexInput input = cloneData();
boolean success = false;
try {
final ValuesEnum valuesEnum = arrayTemplate.getDirectEnum(source,
input, maxDoc);
success = true;
return valuesEnum;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input);
}
}
}
@Override
public ValueType type() {
return type;
}
}
}

View File

@ -1,515 +0,0 @@
package org.apache.lucene.index.values;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
import org.apache.lucene.index.values.IndexDocValuesArray.ByteValues;
import org.apache.lucene.index.values.IndexDocValuesArray.IntValues;
import org.apache.lucene.index.values.IndexDocValuesArray.LongValues;
import org.apache.lucene.index.values.IndexDocValuesArray.ShortValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.packed.PackedInts;
/**
* Stores ints packed and fixed with fixed-bit precision.
*
* @lucene.experimental
* */
class IntsImpl {
private static final String CODEC_NAME = "Ints";
private static final byte PACKED = 0x00;
private static final byte FIXED_64 = 0x01;
private static final byte FIXED_32 = 0x02;
private static final byte FIXED_16 = 0x03;
private static final byte FIXED_8 = 0x04;
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
static class IntsWriter extends Writer {
private LongsRef intsRef;
private final IndexDocValuesArray array;
private long minValue;
private long maxValue;
private boolean started;
private final String id;
private int lastDocId = -1;
private final Directory dir;
private final byte typeOrd;
private IndexOutput datOut;
private boolean merging;
private final IOContext context;
protected IntsWriter(Directory dir, String id, Counter bytesUsed,
ValueType valueType, IOContext context) throws IOException {
super(bytesUsed);
this.context = context;
this.dir = dir;
this.id = id;
switch (valueType) {
case FIXED_INTS_16:
array= new ShortValues(bytesUsed);
typeOrd = FIXED_16;
break;
case FIXED_INTS_32:
array = new IntValues(bytesUsed);
typeOrd = FIXED_32;
break;
case FIXED_INTS_64:
array = new LongValues(bytesUsed);
typeOrd = FIXED_64;
break;
case FIXED_INTS_8:
array = new ByteValues(bytesUsed);
typeOrd = FIXED_8;
break;
case VAR_INTS:
array = new LongValues(bytesUsed);
typeOrd = PACKED;
break;
default:
throw new IllegalStateException("unknown type " + valueType);
}
}
@Override
public void add(int docID, long v) throws IOException {
assert lastDocId < docID;
if (!started) {
started = true;
minValue = maxValue = v;
} else {
if (v < minValue) {
minValue = v;
} else if (v > maxValue) {
maxValue = v;
}
}
lastDocId = docID;
array.set(docID, v);
}
private final void initDataOut(byte typeOrd) throws IOException {
if (datOut == null) {
boolean success = false;
try {
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
DATA_EXTENSION), context);
CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT);
datOut.writeByte(typeOrd);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(datOut);
}
}
}
}
@Override
public void finish(int docCount) throws IOException {
boolean success = false;
try {
if (datOut == null) {
// if we only add or merge Packed ints datOut is not initialized
assert !merging || typeOrd == PACKED;
finishAdd(docCount);
} else {
assert datOut != null && merging && typeOrd != PACKED;
// on merge, simply fill up missing values
fillDefault(datOut, docCount - (lastDocId + 1));
}
success = true;
} finally {
if (success) {
IOUtils.close(datOut);
} else {
IOUtils.closeWhileHandlingException(datOut);
}
array.clear();
}
}
private final void finishAdd(int docCount) throws IOException {
if (!started) {
minValue = maxValue = 0;
}
byte headerType = typeOrd;
if (typeOrd == PACKED) {
final long delta = maxValue - minValue;
// if we exceed the range of positive longs we must switch to fixed
// ints
if (delta <= (maxValue >= 0 && minValue <= 0 ? Long.MAX_VALUE
: Long.MAX_VALUE - 1) && delta >= 0) {
writePackedInts(docCount);
return; // done
} else {
headerType = FIXED_64;
}
}
initDataOut(headerType);
array.write(datOut, docCount);
assert datOut != null;
}
// TODO how can we improve VAR_INT mergeing here without violating compression?
@Override
protected void merge(MergeState state) throws IOException {
merging = true;
if (typeOrd != PACKED) {
initDataOut(typeOrd); // init datOut since we merge directly
if (state.liveDocs == null && state.reader instanceof IntsReader) {
// no deleted docs - try bulk copy
final IntsReader reader = (IntsReader) state.reader;
if (reader.type == typeOrd) {
final int docBase = state.docBase;
if (docBase - lastDocId > 1) {
// fill with default values
lastDocId += fillDefault(datOut, docBase - lastDocId - 1);
}
lastDocId += reader.transferTo(datOut);
return;
}
}
}
super.merge(state);
}
@Override
protected void mergeDoc(int docID) throws IOException {
assert docID > lastDocId : "docID: " + docID
+ " must be greater than the last added doc id: " + lastDocId;
assert merging;
final long value = intsRef.get();
if (typeOrd != PACKED) {
// if now packed we do straight merging and write values directly
assert datOut != null;
if (docID - lastDocId > 1) {
// fill with default values
array.writeDefaults(datOut, docID - lastDocId - 1);
}
array.writeDirect(datOut, value);
lastDocId = docID;
} else {
add(docID, value);
}
}
protected final int fillDefault(IndexOutput datOut, int numValues) throws IOException {
array.writeDefaults(datOut, numValues);
return numValues;
}
private void writePackedInts(int docCount) throws IOException {
initDataOut(PACKED);
datOut.writeLong(minValue);
assert array.type() == ValueType.FIXED_INTS_64;
final long[] docToValue = (long[])array.getArray();
// write a default value to recognize docs without a value for that
// field
final long defaultValue = maxValue >= 0 && minValue <= 0 ? 0 - minValue
: ++maxValue - minValue;
datOut.writeLong(defaultValue);
PackedInts.Writer w = PackedInts.getWriter(datOut, docCount,
PackedInts.bitsRequired(maxValue - minValue));
final int limit = docToValue.length > docCount ? docCount
: docToValue.length;
for (int i = 0; i < limit; i++) {
w.add(docToValue[i] == 0 ? defaultValue : docToValue[i] - minValue);
}
for (int i = limit; i < docCount; i++) {
w.add(defaultValue);
}
w.finish();
}
@Override
protected void setNextEnum(ValuesEnum valuesEnum) {
intsRef = valuesEnum.getInt();
}
@Override
public void add(int docID, PerDocFieldValues docValues) throws IOException {
add(docID, docValues.getInt());
}
@Override
public void files(Collection<String> files) throws IOException {
files.add(IndexFileNames.segmentFileName(id, "", DATA_EXTENSION));
}
}
/**
* Opens all necessary files, but does not read any data in until you call
* {@link #load}.
*/
static class IntsReader extends IndexDocValues {
private final IndexInput datIn;
private final byte type;
private final int numDocs;
protected IntsReader(Directory dir, String id, int numDocs, IOContext context) throws IOException {
datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
Writer.DATA_EXTENSION), context);
this.numDocs = numDocs;
boolean success = false;
try {
CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START);
type = datIn.readByte();
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(datIn);
}
}
}
public int transferTo(IndexOutput datOut) throws IOException {
IndexInput indexInput = (IndexInput) datIn.clone();
boolean success = false;
try {
indexInput.seek(CodecUtil.headerLength(CODEC_NAME));
// skip type
indexInput.readByte();
datOut.copyBytes(indexInput, bytesPerValue(type) * numDocs);
success = true;
} finally {
if (success) {
IOUtils.close(indexInput);
} else {
IOUtils.closeWhileHandlingException(indexInput);
}
}
return numDocs;
}
/**
* Loads the actual values. You may call this more than once, eg if you
* already previously loaded but then discarded the Source.
*/
@Override
public Source load() throws IOException {
boolean success = false;
final Source source;
IndexInput input = null;
try {
input = (IndexInput) datIn.clone();
input.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
source = loadFixedSource(type, input, numDocs);
success = true;
return source;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input, datIn);
}
}
}
@Override
public void close() throws IOException {
super.close();
datIn.close();
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
final IndexInput input = (IndexInput) datIn.clone();
boolean success = false;
try {
input.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
final ValuesEnum inst = directEnum(type, source, input, numDocs);
success = true;
return inst;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input);
}
}
}
@Override
public ValueType type() {
return ValueType.VAR_INTS;
}
}
private static ValuesEnum directEnum(byte ord, AttributeSource attrSource, IndexInput input, int numDocs) throws IOException {
switch (ord) {
case FIXED_16:
return new ShortValues((Counter)null).getDirectEnum(attrSource, input, numDocs);
case FIXED_32:
return new IntValues((Counter)null).getDirectEnum(attrSource, input, numDocs);
case FIXED_64:
return new LongValues((Counter)null).getDirectEnum(attrSource, input, numDocs);
case FIXED_8:
return new ByteValues((Counter)null).getDirectEnum(attrSource, input, numDocs);
case PACKED:
return new PackedIntsEnumImpl(attrSource, input);
default:
throw new IllegalStateException("unknown type ordinal " + ord);
}
}
private static IndexDocValues.Source loadFixedSource(byte ord, IndexInput input, int numDoc) throws IOException {
switch (ord) {
case FIXED_16:
return new ShortValues(input, numDoc);
case FIXED_32:
return new IntValues(input, numDoc);
case FIXED_64:
return new LongValues(input, numDoc);
case FIXED_8:
return new ByteValues(input, numDoc);
case PACKED:
return new PackedIntsSource(input);
default:
throw new IllegalStateException("unknown type ordinal " + ord);
}
}
private static int bytesPerValue(byte typeOrd) {
final int numBytes;
switch (typeOrd) {
case FIXED_16:
numBytes = 2;
break;
case FIXED_32:
numBytes = 4;
break;
case FIXED_64:
numBytes = 8;
break;
case FIXED_8:
numBytes = 1;
break;
default:
throw new IllegalStateException("illegal type ord " + typeOrd);
}
return numBytes;
}
static class PackedIntsSource extends Source {
private final long minValue;
private final long defaultValue;
private final PackedInts.Reader values;
public PackedIntsSource(IndexInput dataIn) throws IOException {
minValue = dataIn.readLong();
defaultValue = dataIn.readLong();
values = PackedInts.getReader(dataIn);
}
@Override
public long getInt(int docID) {
// TODO -- can we somehow avoid 2X method calls
// on each get? must push minValue down, and make
// PackedInts implement Ints.Source
assert docID >= 0;
final long value = values.get(docID);
return value == defaultValue ? 0 : minValue + value;
}
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
return new SourceEnum(attrSource, type(), this, values.size()) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs)
return pos = NO_MORE_DOCS;
intsRef.ints[intsRef.offset] = source.getInt(target);
return pos = target;
}
};
}
@Override
public ValueType type() {
return ValueType.VAR_INTS;
}
}
private static final class PackedIntsEnumImpl extends ValuesEnum {
private final PackedInts.ReaderIterator ints;
private long minValue;
private final IndexInput dataIn;
private final long defaultValue;
private final int maxDoc;
private int pos = -1;
private PackedIntsEnumImpl(AttributeSource source, IndexInput dataIn)
throws IOException {
super(source, ValueType.VAR_INTS);
intsRef.offset = 0;
this.dataIn = dataIn;
minValue = dataIn.readLong();
defaultValue = dataIn.readLong();
this.ints = PackedInts.getReaderIterator(dataIn);
maxDoc = ints.size();
}
@Override
public void close() throws IOException {
ints.close();
dataIn.close();
}
@Override
public int advance(int target) throws IOException {
if (target >= maxDoc) {
return pos = NO_MORE_DOCS;
}
final long val = ints.advance(target);
intsRef.ints[intsRef.offset] = val == defaultValue ? 0 : minValue + val;
return pos = target;
}
@Override
public int docID() {
return pos;
}
@Override
public int nextDoc() throws IOException {
if (pos >= maxDoc) {
return pos = NO_MORE_DOCS;
}
return advance(pos + 1);
}
}
}

View File

@ -0,0 +1,335 @@
package org.apache.lucene.index.values;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.values.FixedStraightBytesImpl.FixedBytesWriterBase;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
import org.apache.lucene.index.values.IndexDocValuesArray.LongValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.packed.PackedInts;
/**
* Stores integers using {@link PackedInts}
*
* @lucene.experimental
* */
class PackedIntValues {
private static final String CODEC_NAME = "PackedInts";
private static final byte PACKED = 0x00;
private static final byte FIXED_64 = 0x01;
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
static class PackedIntsWriter extends FixedBytesWriterBase {
private LongsRef intsRef;
private long minValue;
private long maxValue;
private boolean started;
private int lastDocId = -1;
protected PackedIntsWriter(Directory dir, String id, Counter bytesUsed,
IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
bytesRef = new BytesRef(8);
}
@Override
public void add(int docID, long v) throws IOException {
assert lastDocId < docID;
if (!started) {
started = true;
minValue = maxValue = v;
} else {
if (v < minValue) {
minValue = v;
} else if (v > maxValue) {
maxValue = v;
}
}
lastDocId = docID;
bytesRef.copy(v);
add(docID, bytesRef);
}
@Override
public void finish(int docCount) throws IOException {
boolean success = false;
final IndexOutput dataOut = getOrCreateDataOut();
try {
if (!started) {
minValue = maxValue = 0;
}
final long delta = maxValue - minValue;
// if we exceed the range of positive longs we must switch to fixed
// ints
if (delta <= (maxValue >= 0 && minValue <= 0 ? Long.MAX_VALUE
: Long.MAX_VALUE - 1) && delta >= 0) {
dataOut.writeByte(PACKED);
writePackedInts(dataOut, docCount);
return; // done
} else {
dataOut.writeByte(FIXED_64);
}
writeData(dataOut);
writeZeros(docCount - (lastDocID + 1), dataOut);
success = true;
} finally {
resetPool();
if (success) {
IOUtils.close(dataOut);
} else {
IOUtils.closeWhileHandlingException(dataOut);
}
}
}
@Override
protected void mergeDoc(int docID) throws IOException {
assert docID > lastDocId : "docID: " + docID
+ " must be greater than the last added doc id: " + lastDocId;
add(docID, intsRef.get());
}
private void writePackedInts(IndexOutput datOut, int docCount) throws IOException {
datOut.writeLong(minValue);
// write a default value to recognize docs without a value for that
// field
final long defaultValue = maxValue >= 0 && minValue <= 0 ? 0 - minValue
: ++maxValue - minValue;
datOut.writeLong(defaultValue);
PackedInts.Writer w = PackedInts.getWriter(datOut, docCount,
PackedInts.bitsRequired(maxValue - minValue));
for (int i = 0; i < lastDocID + 1; i++) {
set(bytesRef, i);
long asLong = bytesRef.asLong();
w.add(asLong == 0 ? defaultValue : asLong - minValue);
}
for (int i = lastDocID + 1; i < docCount; i++) {
w.add(defaultValue);
}
w.finish();
w.finish();
}
@Override
protected void setNextEnum(ValuesEnum valuesEnum) {
intsRef = valuesEnum.getInt();
}
@Override
public void add(int docID, PerDocFieldValues docValues) throws IOException {
add(docID, docValues.getInt());
}
}
/**
* Opens all necessary files, but does not read any data in until you call
* {@link #load}.
*/
static class PackedIntsReader extends IndexDocValues {
private final IndexInput datIn;
private final byte type;
private final int numDocs;
private final LongValues values;
protected PackedIntsReader(Directory dir, String id, int numDocs,
IOContext context) throws IOException {
datIn = dir.openInput(
IndexFileNames.segmentFileName(id, "", Writer.DATA_EXTENSION),
context);
this.numDocs = numDocs;
boolean success = false;
try {
CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START);
type = datIn.readByte();
values = type == FIXED_64 ? new LongValues() : null;
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(datIn);
}
}
}
/**
* Loads the actual values. You may call this more than once, eg if you
* already previously loaded but then discarded the Source.
*/
@Override
public Source load() throws IOException {
boolean success = false;
final Source source;
IndexInput input = null;
try {
input = (IndexInput) datIn.clone();
if (values == null) {
source = new PackedIntsSource(input);
} else {
source = values.newFromInput(input, numDocs);
}
success = true;
return source;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input, datIn);
}
}
}
@Override
public void close() throws IOException {
super.close();
datIn.close();
}
@Override
public ValuesEnum getEnum(AttributeSource source) throws IOException {
final IndexInput input = (IndexInput) datIn.clone();
boolean success = false;
try {
final ValuesEnum inst;
if (values == null) {
inst = new PackedIntsEnumImpl(source, input);
} else {
inst = values.getDirectEnum(source, input, numDocs);
}
success = true;
return inst;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input);
}
}
}
@Override
public ValueType type() {
return ValueType.VAR_INTS;
}
}
static class PackedIntsSource extends Source {
private final long minValue;
private final long defaultValue;
private final PackedInts.Reader values;
public PackedIntsSource(IndexInput dataIn) throws IOException {
minValue = dataIn.readLong();
defaultValue = dataIn.readLong();
values = PackedInts.getReader(dataIn);
}
@Override
public long getInt(int docID) {
// TODO -- can we somehow avoid 2X method calls
// on each get? must push minValue down, and make
// PackedInts implement Ints.Source
assert docID >= 0;
final long value = values.get(docID);
return value == defaultValue ? 0 : minValue + value;
}
@Override
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
return new SourceEnum(attrSource, type(), this, values.size()) {
@Override
public int advance(int target) throws IOException {
if (target >= numDocs)
return pos = NO_MORE_DOCS;
intsRef.ints[intsRef.offset] = source.getInt(target);
return pos = target;
}
};
}
@Override
public ValueType type() {
return ValueType.VAR_INTS;
}
}
private static final class PackedIntsEnumImpl extends ValuesEnum {
private final PackedInts.ReaderIterator ints;
private long minValue;
private final IndexInput dataIn;
private final long defaultValue;
private final int maxDoc;
private int pos = -1;
private PackedIntsEnumImpl(AttributeSource source, IndexInput dataIn)
throws IOException {
super(source, ValueType.VAR_INTS);
intsRef.offset = 0;
this.dataIn = dataIn;
minValue = dataIn.readLong();
defaultValue = dataIn.readLong();
this.ints = PackedInts.getReaderIterator(dataIn);
maxDoc = ints.size();
}
@Override
public void close() throws IOException {
ints.close();
dataIn.close();
}
@Override
public int advance(int target) throws IOException {
if (target >= maxDoc) {
return pos = NO_MORE_DOCS;
}
final long val = ints.advance(target);
intsRef.ints[intsRef.offset] = val == defaultValue ? 0 : minValue + val;
return pos = target;
}
@Override
public int docID() {
return pos;
}
@Override
public int nextDoc() throws IOException {
if (pos >= maxDoc) {
return pos = NO_MORE_DOCS;
}
return advance(pos + 1);
}
}
}

View File

@ -44,9 +44,9 @@ import org.apache.lucene.util.LongsRef;
public abstract class ValuesEnum extends DocIdSetIterator {
private AttributeSource source;
private final ValueType enumType;
protected BytesRef bytesRef;
protected FloatsRef floatsRef;
protected LongsRef intsRef;
protected BytesRef bytesRef = new BytesRef(1);
protected FloatsRef floatsRef = new FloatsRef(1);
protected LongsRef intsRef = new LongsRef(1);
/**
* Creates a new {@link ValuesEnum} for the given type. The
@ -62,28 +62,6 @@ public abstract class ValuesEnum extends DocIdSetIterator {
protected ValuesEnum(AttributeSource source, ValueType enumType) {
this.source = source;
this.enumType = enumType;
switch (enumType) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
bytesRef = new BytesRef();
break;
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
case VAR_INTS:
intsRef = new LongsRef(1);
break;
case FLOAT_32:
case FLOAT_64:
floatsRef = new FloatsRef(1);
break;
}
}
/**

View File

@ -19,29 +19,17 @@ package org.apache.lucene.index.values;
import java.io.IOException;
import org.apache.lucene.index.values.Bytes.BytesBaseSource;
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.index.values.Bytes.DerefBytesSourceBase;
import org.apache.lucene.index.values.Bytes.DerefBytesEnumBase;
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ByteBlockPool.Allocator;
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
import org.apache.lucene.util.packed.PackedInts;
// Stores variable-length byte[] by deref, ie when two docs
// have the same value, they store only 1 byte[] and both
@ -56,51 +44,6 @@ class VarDerefBytesImpl {
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
private static final class AddressByteStartArray extends
TrackingDirectBytesStartArray {
int[] address;
AddressByteStartArray(int size, Counter bytesUsed) {
super(size, bytesUsed);
}
@Override
public Counter bytesUsed() {
return bytesUsed;
}
@Override
public int[] clear() {
if (address != null) {
bytesUsed.addAndGet(-address.length * RamUsageEstimator.NUM_BYTES_INT);
address = null;
}
return super.clear();
}
@Override
public int[] grow() {
assert address != null;
final int oldSize = address.length;
final int[] retVal = super.grow();
address = ArrayUtil.grow(address, retVal.length);
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
* (address.length - oldSize));
return retVal;
}
@Override
public int[] init() {
if (address == null) {
address = new int[ArrayUtil.oversize(initSize,
RamUsageEstimator.NUM_BYTES_INT)];
bytesUsed.addAndGet((address.length) * RamUsageEstimator.NUM_BYTES_INT);
}
return super.init();
}
}
/*
* TODO: if impls like this are merged we are bound to the amount of memory we
* can store into a BytesRefHash and therefore how much memory a ByteBlockPool
@ -110,170 +53,66 @@ class VarDerefBytesImpl {
* move the byte[] writing to #finish(int) and store the bytes in sorted
* order and merge them in a streamed fashion.
*/
static class Writer extends BytesWriterBase {
private int[] docToAddress;
private int address = 1;
private final AddressByteStartArray array = new AddressByteStartArray(1,
bytesUsed);
private final BytesRefHash hash;
static class Writer extends DerefBytesWriterBase {
public Writer(Directory dir, String id, Counter bytesUsed, IOContext context)
throws IOException {
this(dir, id, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed),
bytesUsed, context);
}
public Writer(Directory dir, String id, Allocator allocator,
Counter bytesUsed, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
hash = new BytesRefHash(new ByteBlockPool(allocator), 16, array);
docToAddress = new int[1];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
}
@Override
public void add(int docID, BytesRef bytes) throws IOException {
if (bytes.length == 0)
return; // default
final int e = hash.add(bytes);
if (docID >= docToAddress.length) {
final int oldSize = docToAddress.length;
docToAddress = ArrayUtil.grow(docToAddress, 1 + docID);
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
* (docToAddress.length - oldSize));
}
final int docAddress;
if (e >= 0) {
docAddress = array.address[e] = address;
address += bytes.length < 128 ? 1 : 2;
address += bytes.length;
} else {
docAddress = array.address[(-e) - 1];
}
docToAddress[docID] = docAddress;
}
private static int writePrefixLength(DataOutput datOut, BytesRef bytes)
throws IOException {
if (bytes.length < 128) {
datOut.writeByte((byte) bytes.length);
return 1;
} else {
datOut.writeByte((byte) (0x80 | (bytes.length >> 8)));
datOut.writeByte((byte) (bytes.length & 0xff));
return 2;
}
protected void checkSize(BytesRef bytes) {
// allow var bytes sizes
}
// Important that we get docCount, in case there were
// some last docs that we didn't see
@Override
public void finish(int docCount) throws IOException {
final IndexOutput datOut = getDataOut();
boolean success = false;
try {
final int size = hash.size();
final BytesRef bytesRef = new BytesRef();
for (int i = 0; i < size; i++) {
hash.get(i, bytesRef);
writePrefixLength(datOut, bytesRef);
datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
}
success = true;
} finally {
hash.close();
if (success) {
IOUtils.close(datOut);
} else {
IOUtils.closeWhileHandlingException(datOut);
}
}
final IndexOutput idxOut = getIndexOut();
success = false;
try {
idxOut.writeInt(address - 1);
// write index
// TODO(simonw): -- allow forcing fixed array (not -1)
// TODO(simonw): check the address calculation / make it more intuitive
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
PackedInts.bitsRequired(address - 1));
final int limit;
if (docCount > docToAddress.length) {
limit = docToAddress.length;
} else {
limit = docCount;
}
for (int i = 0; i < limit; i++) {
w.add(docToAddress[i]);
}
for (int i = limit; i < docCount; i++) {
w.add(0);
}
w.finish();
success = true;
} finally {
if (success) {
IOUtils.close(idxOut);
} else {
IOUtils.closeWhileHandlingException(idxOut);
}
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
* (-docToAddress.length));
docToAddress = null;
public void finishInternal(int docCount) throws IOException {
final int size = hash.size();
final long[] addresses = new long[size+1];
final IndexOutput datOut = getOrCreateDataOut();
int addr = 1;
final BytesRef bytesRef = new BytesRef();
for (int i = 0; i < size; i++) {
hash.get(i, bytesRef);
addresses[i+1] = addr;
addr += writePrefixLength(datOut, bytesRef) + bytesRef.length;
datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
}
final IndexOutput idxOut = getOrCreateIndexOut();
// write the max address to read directly on source load
idxOut.writeLong(addr - 1);
writeIndex(idxOut, docCount, addresses[size], addresses, docToEntry);
}
}
public static class Reader extends BytesReaderBase {
private final long totalBytes;
Reader(Directory dir, String id, int maxDoc, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_START, true, context);
totalBytes = idxIn.readLong();
}
@Override
public Source load() throws IOException {
final IndexInput data = cloneData();
final IndexInput index = cloneIndex();
data.seek(CodecUtil.headerLength(CODEC_NAME));
index.seek(CodecUtil.headerLength(CODEC_NAME));
final long totalBytes = index.readInt(); // should be long
return new Source(data, index, totalBytes);
return new Source(cloneData(), cloneIndex(), totalBytes);
}
private static class Source extends BytesBaseSource {
private final PackedInts.Reader index;
private final static class Source extends DerefBytesSourceBase {
public Source(IndexInput datIn, IndexInput idxIn, long totalBytes)
throws IOException {
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), totalBytes);
index = PackedInts.getReader(idxIn);
super(datIn, idxIn, totalBytes, ValueType.BYTES_VAR_DEREF);
}
@Override
public BytesRef getBytes(int docID, BytesRef bytesRef) {
long address = index.get(docID);
long address = addresses.get(docID);
bytesRef.length = 0;
return address == 0 ? bytesRef : data.fillSliceWithPrefix(bytesRef,
--address);
}
@Override
public int getValueCount() {
throw new UnsupportedOperationException();
}
@Override
public ValueType type() {
return ValueType.BYTES_VAR_DEREF;
}
@Override
protected int maxDoc() {
return index.size();
}
}
@Override
@ -281,8 +120,8 @@ class VarDerefBytesImpl {
return new VarDerefBytesEnum(source, cloneData(), cloneIndex());
}
static class VarDerefBytesEnum extends DerefBytesEnum {
final static class VarDerefBytesEnum extends DerefBytesEnumBase {
public VarDerefBytesEnum(AttributeSource source, IndexInput datIn,
IndexInput idxIn) throws IOException {
super(source, datIn, idxIn, -1, ValueType.BYTES_VAR_DEREF);
@ -299,8 +138,9 @@ class VarDerefBytesImpl {
} else {
size = ((sizeByte & 0x7f) << 8) | ((datIn.readByte() & 0xff));
}
if (ref.bytes.length < size)
if (ref.bytes.length < size) {
ref.grow(size);
}
ref.length = size;
ref.offset = 0;
datIn.readBytes(ref.bytes, 0, size);

View File

@ -18,28 +18,18 @@ package org.apache.lucene.index.values;
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.index.values.Bytes.BytesBaseSortedSource;
import org.apache.lucene.index.values.Bytes.BytesSortedSourceBase;
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
import org.apache.lucene.index.values.Bytes.DerefBytesWriterBase;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ByteBlockPool.Allocator;
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
import org.apache.lucene.util.packed.PackedInts;
// Stores variable-length byte[] by deref, ie when two docs
@ -55,130 +45,66 @@ class VarSortedBytesImpl {
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
static class Writer extends BytesWriterBase {
private int[] docToEntry;
final static class Writer extends DerefBytesWriterBase {
private final Comparator<BytesRef> comp;
private final BytesRefHash hash;
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
Counter bytesUsed, IOContext context) throws IOException {
this(dir, id, comp, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed),
bytesUsed, context);
}
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
Allocator allocator, Counter bytesUsed, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context);
this.hash = new BytesRefHash(new ByteBlockPool(allocator),
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
this.comp = comp;
docToEntry = new int[1];
docToEntry[0] = -1;
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
}
@Override
public void add(int docID, BytesRef bytes) throws IOException {
if (bytes.length == 0)
return;// default
if (docID >= docToEntry.length) {
int[] newArray = new int[ArrayUtil.oversize(1 + docID,
RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length);
Arrays.fill(newArray, docToEntry.length, newArray.length, -1);
bytesUsed.addAndGet((newArray.length - docToEntry.length)
* RamUsageEstimator.NUM_BYTES_INT);
docToEntry = newArray;
}
final int e = hash.add(bytes);
docToEntry[docID] = e < 0 ? (-e) - 1 : e;
protected void checkSize(BytesRef bytes) {
// allow var bytes sizes
}
// Important that we get docCount, in case there were
// some last docs that we didn't see
@Override
public void finish(int docCount) throws IOException {
public void finishInternal(int docCount) throws IOException {
final int count = hash.size();
final IndexOutput datOut = getDataOut();
final IndexOutput datOut = getOrCreateDataOut();
long offset = 0;
long lastOffset = 0;
final int[] index = new int[count];
final int[] index = new int[count+1];
final long[] offsets = new long[count];
boolean success = false;
try {
final int[] sortedEntries = hash.sort(comp);
// first dump bytes data, recording index & offset as
// we go
for (int i = 0; i < count; i++) {
final int e = sortedEntries[i];
offsets[i] = offset;
index[e] = 1 + i;
final int[] sortedEntries = hash.sort(comp);
// first dump bytes data, recording index & offset as
// we go
for (int i = 0; i < count; i++) {
final int e = sortedEntries[i];
offsets[i] = offset;
index[e+1] = 1 + i;
final BytesRef bytes = hash.get(e, new BytesRef());
// TODO: we could prefix code...
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
lastOffset = offset;
offset += bytes.length;
}
success = true;
} finally {
if (success) {
IOUtils.close(datOut);
} else {
IOUtils.closeWhileHandlingException(datOut);
}
hash.close();
final BytesRef bytes = hash.get(e, new BytesRef());
// TODO: we could prefix code...
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
lastOffset = offset;
offset += bytes.length;
}
final IndexOutput idxOut = getIndexOut();
success = false;
try {
// total bytes of data
idxOut.writeLong(offset);
// write index -- first doc -> 1+ord
// TODO(simonw): allow not -1:
final PackedInts.Writer indexWriter = PackedInts.getWriter(idxOut,
docCount, PackedInts.bitsRequired(count));
final int limit = docCount > docToEntry.length ? docToEntry.length
: docCount;
for (int i = 0; i < limit; i++) {
final int e = docToEntry[i];
indexWriter.add(e == -1 ? 0 : index[e]);
}
for (int i = limit; i < docCount; i++) {
indexWriter.add(0);
}
indexWriter.finish();
// next ord (0-based) -> offset
// TODO(simonw): -- allow not -1:
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count,
PackedInts.bitsRequired(lastOffset));
for (int i = 0; i < count; i++) {
offsetWriter.add(offsets[i]);
}
offsetWriter.finish();
success = true;
} finally {
bytesUsed.addAndGet((-docToEntry.length)
* RamUsageEstimator.NUM_BYTES_INT);
docToEntry = null;
if (success) {
IOUtils.close(idxOut);
} else {
IOUtils.closeWhileHandlingException(idxOut);
}
final IndexOutput idxOut = getOrCreateIndexOut();
// total bytes of data
idxOut.writeLong(offset);
// write index -- first doc -> 1+ord
writeIndex(idxOut, docCount, count, index, docToEntry);
// next ord (0-based) -> offset
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count,
PackedInts.bitsRequired(lastOffset));
for (int i = 0; i < count; i++) {
offsetWriter.add(offsets[i]);
}
offsetWriter.finish();
}
}
public static class Reader extends BytesReaderBase {
private final Comparator<BytesRef> defaultComp;
Reader(Directory dir, String id, int maxDoc, Comparator<BytesRef> comparator, IOContext context) throws IOException {
Reader(Directory dir, String id, int maxDoc,
Comparator<BytesRef> comparator, IOContext context) throws IOException {
super(dir, id, CODEC_NAME, VERSION_START, true, context);
this.defaultComp = comparator;
}
@ -196,32 +122,25 @@ class VarSortedBytesImpl {
return new Source(cloneData(), indexIn, comp, indexIn.readLong());
}
private static class Source extends BytesBaseSortedSource {
private final PackedInts.Reader docToOrdIndex;
private static class Source extends BytesSortedSourceBase {
private final PackedInts.Reader ordToOffsetIndex; // 0-based
private final long totBytes;
private final int valueCount;
public Source(IndexInput datIn, IndexInput idxIn,
Comparator<BytesRef> comp, long dataLength) throws IOException {
super(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), dataLength);
super(datIn, idxIn, comp, dataLength, ValueType.BYTES_VAR_SORTED);
totBytes = dataLength;
docToOrdIndex = PackedInts.getReader(idxIn);
ordToOffsetIndex = PackedInts.getReader(idxIn);
valueCount = ordToOffsetIndex.size();
closeIndexInput();
}
@Override
public int ord(int docID) {
return (int) docToOrdIndex.get(docID) - 1;
}
@Override
public int getByValue(BytesRef bytes, BytesRef tmpRef) {
return binarySearch(bytes, tmpRef, 0, valueCount - 1);
}
@Override
public int getValueCount() {
return valueCount;
@ -240,16 +159,6 @@ class VarSortedBytesImpl {
data.fillSlice(bytesRef, offset, (int) (nextOffset - offset));
return bytesRef;
}
@Override
public ValueType type() {
return ValueType.BYTES_VAR_SORTED;
}
@Override
protected int maxDoc() {
return docToOrdIndex.size();
}
}
@Override

View File

@ -19,9 +19,9 @@ package org.apache.lucene.index.values;
import java.io.IOException;
import org.apache.lucene.index.values.Bytes.BytesBaseSource;
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
import org.apache.lucene.index.values.Bytes.DerefBytesSourceBase;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@ -32,7 +32,6 @@ import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
import org.apache.lucene.util.packed.PackedInts;
@ -95,7 +94,7 @@ class VarStraightBytesImpl {
@Override
protected void merge(MergeState state) throws IOException {
merge = true;
datOut = getDataOut();
datOut = getOrCreateDataOut();
boolean success = false;
try {
if (state.liveDocs == null && state.reader instanceof Reader) {
@ -166,7 +165,7 @@ class VarStraightBytesImpl {
public void finish(int docCount) throws IOException {
boolean success = false;
assert (!merge && datOut == null) || (merge && datOut != null);
final IndexOutput datOut = getDataOut();
final IndexOutput datOut = getOrCreateDataOut();
try {
if (!merge) {
// header is already written in getDataOut()
@ -183,7 +182,7 @@ class VarStraightBytesImpl {
}
success = false;
final IndexOutput idxOut = getIndexOut();
final IndexOutput idxOut = getOrCreateIndexOut();
try {
if (lastDocID == -1) {
idxOut.writeVLong(0);
@ -234,12 +233,10 @@ class VarStraightBytesImpl {
return new Source(cloneData(), cloneIndex());
}
private class Source extends BytesBaseSource {
private final PackedInts.Reader addresses;
private class Source extends DerefBytesSourceBase {
public Source(IndexInput datIn, IndexInput idxIn) throws IOException {
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), idxIn.readVLong());
addresses = PackedInts.getReader(idxIn);
super(datIn, idxIn, idxIn.readVLong(), ValueType.BYTES_VAR_STRAIGHT);
}
@Override
@ -263,21 +260,6 @@ class VarStraightBytesImpl {
}
};
}
@Override
public int getValueCount() {
throw new UnsupportedOperationException();
}
@Override
public ValueType type() {
return ValueType.BYTES_VAR_STRAIGHT;
}
@Override
protected int maxDoc() {
return addresses.size();
}
}
@Override

View File

@ -243,6 +243,18 @@ public final class ByteBlockPool {
assert term.length >= 0;
return term;
}
/**
* Dereferences the byte block according to {@link BytesRef} offset. The offset
* is interpreted as the absolute offset into the {@link ByteBlockPool}.
*/
public final BytesRef deref(BytesRef bytes) {
final int offset = bytes.offset;
byte[] buffer = buffers[offset >> BYTE_BLOCK_SHIFT];
int pos = offset & BYTE_BLOCK_MASK;
bytes.bytes = buffer;
bytes.offset = pos;
return bytes;
}
/**
* Copies the given {@link BytesRef} at the current positions (

View File

@ -238,6 +238,12 @@ public final class BytesRef implements Comparable<BytesRef> {
return sb.toString();
}
/**
* Copies the given {@link BytesRef}
* <p>
* NOTE: this method resets the offset to 0 and resizes the reference array
* if needed.
*/
public void copy(BytesRef other) {
if (bytes.length < other.length) {
bytes = new byte[other.length];
@ -247,6 +253,93 @@ public final class BytesRef implements Comparable<BytesRef> {
offset = 0;
}
/**
* Copies the given long value and encodes it as 8 byte Big-Endian.
* <p>
* NOTE: this method resets the offset to 0, length to 8 and resizes the reference array
* if needed.
*/
public void copy(long value) {
if (bytes.length < 8) {
bytes = new byte[8];
}
copyInternal((int) (value >> 32), offset = 0);
copyInternal((int) value, 4);
length = 8;
}
/**
* Copies the given int value and encodes it as 4 byte Big-Endian.
* <p>
* NOTE: this method resets the offset to 0, length to 4 and resizes the reference array
* if needed.
*/
public void copy(int value) {
if (bytes.length < 4) {
bytes = new byte[4];
}
copyInternal(value, offset = 0);
length = 4;
}
/**
* Copies the given short value and encodes it as a 2 byte Big-Endian.
* <p>
* NOTE: this method resets the offset to 0, length to 2 and resizes the reference array
* if needed.
*/
public void copy(short value) {
if (bytes.length < 2) {
bytes = new byte[2];
}
bytes[offset] = (byte) (value >> 8);
bytes[offset + 1] = (byte) (value);
}
/**
* Converts 2 consecutive bytes from the current offset to a short. Bytes are
* interpreted as Big-Endian (most significant bit first)
* <p>
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
*/
public short asShort() {
int pos = offset;
return (short) (0xFFFF & ((bytes[pos++] & 0xFF) << 8) | (bytes[pos] & 0xFF));
}
/**
* Converts 4 consecutive bytes from the current offset to an int. Bytes are
* interpreted as Big-Endian (most significant bit first)
* <p>
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
*/
public int asInt() {
return asIntInternal(offset);
}
/**
* Converts 8 consecutive bytes from the current offset to a long. Bytes are
* interpreted as Big-Endian (most significant bit first)
* <p>
* NOTE: this method does <b>NOT</b> check the bounds of the referenced array.
*/
public long asLong() {
return (((long) asIntInternal(offset) << 32) | asIntInternal(offset + 4) & 0xFFFFFFFFL);
}
private void copyInternal(int value, int startOffset) {
bytes[startOffset] = (byte) (value >> 24);
bytes[startOffset + 1] = (byte) (value >> 16);
bytes[startOffset + 2] = (byte) (value >> 8);
bytes[startOffset + 3] = (byte) (value);
}
private int asIntInternal(int pos) {
return ((bytes[pos++] & 0xFF) << 24) | ((bytes[pos++] & 0xFF) << 16)
| ((bytes[pos++] & 0xFF) << 8) | (bytes[pos] & 0xFF);
}
public void append(BytesRef other) {
int newLen = length + other.length;
if (bytes.length < newLen) {
@ -284,7 +377,7 @@ public final class BytesRef implements Comparable<BytesRef> {
// One is a prefix of the other, or, they are equal:
return this.length - other.length;
}
private final static Comparator<BytesRef> utf8SortedAsUnicodeSortOrder = new UTF8SortedAsUnicodeComparator();
public static Comparator<BytesRef> getUTF8SortedAsUnicodeComparator() {

View File

@ -188,7 +188,7 @@ public class TestDocValues extends LuceneTestCase {
w.add(1, minMax[i][1]);
w.finish(2);
assertEquals(0, trackBytes.get());
IndexDocValues r = Ints.getValues(dir, "test", 2, newIOContext(random));
IndexDocValues r = Ints.getValues(dir, "test", 2, ValueType.VAR_INTS, newIOContext(random));
Source source = getSource(r);
assertEquals(i + " with min: " + minMax[i][0] + " max: " + minMax[i][1],
expectedTypes[i], source.type());
@ -229,7 +229,7 @@ public class TestDocValues extends LuceneTestCase {
w.add(i, (long) sourceArray[i]);
}
w.finish(sourceArray.length);
IndexDocValues r = Ints.getValues(dir, "test", sourceArray.length, newIOContext(random));
IndexDocValues r = Ints.getValues(dir, "test", sourceArray.length, ValueType.FIXED_INTS_8, newIOContext(random));
Source source = r.getSource();
assertTrue(source.hasArray());
byte[] loaded = ((byte[])source.getArray());
@ -250,7 +250,7 @@ public class TestDocValues extends LuceneTestCase {
w.add(i, (long) sourceArray[i]);
}
w.finish(sourceArray.length);
IndexDocValues r = Ints.getValues(dir, "test", sourceArray.length, newIOContext(random));
IndexDocValues r = Ints.getValues(dir, "test", sourceArray.length, ValueType.FIXED_INTS_16, newIOContext(random));
Source source = r.getSource();
assertTrue(source.hasArray());
short[] loaded = ((short[])source.getArray());
@ -271,7 +271,7 @@ public class TestDocValues extends LuceneTestCase {
w.add(i, sourceArray[i]);
}
w.finish(sourceArray.length);
IndexDocValues r = Ints.getValues(dir, "test", sourceArray.length, newIOContext(random));
IndexDocValues r = Ints.getValues(dir, "test", sourceArray.length, ValueType.FIXED_INTS_64, newIOContext(random));
Source source = r.getSource();
assertTrue(source.hasArray());
long[] loaded = ((long[])source.getArray());
@ -292,7 +292,7 @@ public class TestDocValues extends LuceneTestCase {
w.add(i, (long) sourceArray[i]);
}
w.finish(sourceArray.length);
IndexDocValues r = Ints.getValues(dir, "test", sourceArray.length, newIOContext(random));
IndexDocValues r = Ints.getValues(dir, "test", sourceArray.length, ValueType.FIXED_INTS_32, newIOContext(random));
Source source = r.getSource();
assertTrue(source.hasArray());
int[] loaded = ((int[])source.getArray());
@ -363,7 +363,7 @@ public class TestDocValues extends LuceneTestCase {
w.finish(NUM_VALUES + additionalDocs);
assertEquals(0, trackBytes.get());
IndexDocValues r = Ints.getValues(dir, "test", NUM_VALUES + additionalDocs, newIOContext(random));
IndexDocValues r = Ints.getValues(dir, "test", NUM_VALUES + additionalDocs, type, newIOContext(random));
for (int iter = 0; iter < 2; iter++) {
Source s = getSource(r);
assertEquals(type, s.type());