LUCENE-3216: keep float doc values in memory during indexing while merge directly to the target file

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1140044 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2011-06-27 08:07:15 +00:00
parent b15ca9a6b9
commit 28c15b9637
4 changed files with 101 additions and 50 deletions

View File

@ -387,7 +387,7 @@ public final class Bytes {
}
@Override
protected void add(int docID) throws IOException {
protected void mergeDoc(int docID) throws IOException {
add(docID, bytesRef);
}

View File

@ -25,10 +25,12 @@ import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.FloatsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Exposes {@link Writer} and reader ({@link Source}) for 32 bit and 64 bit
@ -44,11 +46,7 @@ public class Floats {
private static final String CODEC_NAME = "SimpleFloats";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
private static final int INT_DEFAULT = Float
.floatToRawIntBits(0.0f);
private static final long LONG_DEFAULT = Double
.doubleToRawLongBits(0.0d);
private static final byte[] DEFAULTS = new byte[] {0,0,0,0,0,0,0,0};
public static Writer getWriter(Directory dir, String id, int precisionBytes,
AtomicLong bytesUsed) throws IOException {
@ -70,16 +68,27 @@ public class Floats {
abstract static class FloatsWriter extends Writer {
private final String id;
private FloatsRef floatsRef;
protected FloatsRef floatsRef;
protected int lastDocId = -1;
protected IndexOutput datOut;
private final byte precision;
private final Directory dir;
protected FloatsWriter(Directory dir, String id, int precision,
AtomicLong bytesUsed) throws IOException {
super(bytesUsed);
this.id = id;
this.precision = (byte) precision;
this.dir = dir;
}
public long ramBytesUsed() {
return 0;
}
final void initDataOut() throws IOException {
assert datOut == null;
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
Writer.DATA_EXTENSION));
boolean success = false;
@ -95,12 +104,8 @@ public class Floats {
}
}
public long ramBytesUsed() {
return 0;
}
@Override
protected void add(int docID) throws IOException {
protected void mergeDoc(int docID) throws IOException {
add(docID, floatsRef.get());
}
@ -114,13 +119,20 @@ public class Floats {
floatsRef = valuesEnum.getFloat();
}
protected abstract int fillDefault(int num) throws IOException;
protected final int fillDefault(int numValues) throws IOException {
for (int i = 0; i < numValues; i++) {
datOut.writeBytes(DEFAULTS, precision);
}
return numValues;
}
@Override
protected void merge(MergeState state) throws IOException {
if (datOut == null) {
initDataOut();
}
if (state.bits == null && state.reader instanceof FloatsReader) {
// no deletes - bulk copy
// TODO: should be do bulks with deletes too?
final FloatsReader reader = (FloatsReader) state.reader;
assert reader.precisionBytes == (int) precision;
if (reader.maxDoc == 0)
@ -131,8 +143,10 @@ public class Floats {
lastDocId += fillDefault(docBase - lastDocId - 1);
}
lastDocId += reader.transferTo(datOut);
} else
} else {
super.merge(state);
}
}
@Override
@ -143,10 +157,12 @@ public class Floats {
// Writes 4 bytes (float) per value
static class Float4Writer extends FloatsWriter {
private int[] values;
protected Float4Writer(Directory dir, String id, AtomicLong bytesUsed)
throws IOException {
super(dir, id, 4, bytesUsed);
values = new int[1];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
}
@Override
@ -154,76 +170,111 @@ public class Floats {
throws IOException {
assert docID > lastDocId : "docID: " + docID
+ " must be greater than the last added doc id: " + lastDocId;
if (docID >= values.length) {
final long len = values.length;
values = ArrayUtil.grow(values, 1 + docID);
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
* ((values.length) - len));
}
values[docID] = Float.floatToRawIntBits((float)v);
lastDocId = docID;
}
@Override
protected void mergeDoc(int docID) throws IOException {
assert datOut != null;
assert docID > lastDocId : "docID: " + docID
+ " must be greater than the last added doc id: " + lastDocId;
if (docID - lastDocId > 1) {
// fill with default values
lastDocId += fillDefault(docID - lastDocId - 1);
fillDefault(docID - lastDocId - 1);
}
assert datOut != null;
datOut.writeInt(Float.floatToRawIntBits((float) v));
++lastDocId;
datOut.writeInt(Float.floatToRawIntBits((float) floatsRef.get()));
lastDocId = docID;
}
@Override
public void finish(int docCount) throws IOException {
boolean success = false;
try {
if (docCount > lastDocId + 1)
for (int i = lastDocId; i < docCount; i++) {
datOut.writeInt(INT_DEFAULT); // default value
int numDefaultsToAppend = docCount - (lastDocId + 1);
if (datOut == null) {
initDataOut();
for (int i = 0; i <= lastDocId; i++) {
datOut.writeInt(values[i]);
}
}
fillDefault(numDefaultsToAppend);
success = true;
} finally {
datOut.close();
bytesUsed.addAndGet(-(RamUsageEstimator.NUM_BYTES_INT
* ((values.length))));
values = null;
IOUtils.closeSafely(!success, datOut);
}
}
@Override
protected int fillDefault(int numValues) throws IOException {
for (int i = 0; i < numValues; i++) {
datOut.writeInt(INT_DEFAULT);
}
return numValues;
}
}
// Writes 8 bytes (double) per value
static class Float8Writer extends FloatsWriter {
private long[] values;
protected Float8Writer(Directory dir, String id, AtomicLong bytesUsed)
throws IOException {
super(dir, id, 8, bytesUsed);
values = new long[1];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG);
}
@Override
public void add(int docID, double v) throws IOException {
assert docID > lastDocId : "docID: " + docID
+ " must be greater than the last added doc id: " + lastDocId;
if (docID >= values.length) {
final long len = values.length;
values = ArrayUtil.grow(values, 1 + docID);
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG
* ((values.length) - len));
}
values[docID] = Double.doubleToLongBits(v);
lastDocId = docID;
}
@Override
protected void mergeDoc(int docID) throws IOException {
assert docID > lastDocId : "docID: " + docID
+ " must be greater than the last added doc id: " + lastDocId;
if (docID - lastDocId > 1) {
// fill with default values
lastDocId += fillDefault(docID - lastDocId - 1);
}
assert datOut != null;
datOut.writeLong(Double.doubleToRawLongBits(v));
++lastDocId;
datOut.writeLong(Double.doubleToRawLongBits((float) floatsRef.get()));
lastDocId = docID;
}
@Override
public void finish(int docCount) throws IOException {
boolean success = false;
try {
if (docCount > lastDocId + 1)
for (int i = lastDocId; i < docCount; i++) {
datOut.writeLong(LONG_DEFAULT); // default value
int numDefaultsToAppend = docCount - (lastDocId + 1);
if (datOut == null) {
initDataOut();
for (int i = 0; i <= lastDocId; i++) {
datOut.writeLong(values[i]);
}
}
fillDefault(numDefaultsToAppend);
success = true;
} finally {
datOut.close();
bytesUsed.addAndGet(-(RamUsageEstimator.NUM_BYTES_LONG
* ((values.length))));
values = null;
IOUtils.closeSafely(!success, datOut);
}
}
@Override
protected int fillDefault(int numValues) throws IOException {
for (int i = 0; i < numValues; i++) {
datOut.writeLong(LONG_DEFAULT);
}
return numValues;
}
}
/**

View File

@ -157,7 +157,7 @@ class IntsImpl {
}
@Override
protected void add(int docID) throws IOException {
protected void mergeDoc(int docID) throws IOException {
add(docID, intsRef.get());
}

View File

@ -113,11 +113,11 @@ public abstract class Writer extends DocValuesConsumer {
* the {@link Writer} implementation. The given document ID must always be
* greater than the previous ID or <tt>0</tt> if called the first time.
*/
protected abstract void add(int docID) throws IOException;
protected abstract void mergeDoc(int docID) throws IOException;
/**
* Sets the next {@link ValuesEnum} to consume values from on calls to
* {@link #add(int)}
* {@link #mergeDoc(int)}
*
* @param valuesEnum
* the next {@link ValuesEnum}, this must not be null
@ -159,7 +159,7 @@ public abstract class Writer extends DocValuesConsumer {
}
}
if (currentDocId == i) { // we are on the doc to merge
add(docID);
mergeDoc(docID);
}
++docID;
}