LUCENE-3216: keep float doc values in memory during indexing while merge directly to the target file

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1140044 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2011-06-27 08:07:15 +00:00
parent b15ca9a6b9
commit 28c15b9637
4 changed files with 101 additions and 50 deletions

View File

@ -387,7 +387,7 @@ public final class Bytes {
} }
@Override @Override
protected void add(int docID) throws IOException { protected void mergeDoc(int docID) throws IOException {
add(docID, bytesRef); add(docID, bytesRef);
} }

View File

@ -25,10 +25,12 @@ import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.FloatsRef; import org.apache.lucene.util.FloatsRef;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
/** /**
* Exposes {@link Writer} and reader ({@link Source}) for 32 bit and 64 bit * Exposes {@link Writer} and reader ({@link Source}) for 32 bit and 64 bit
@ -44,11 +46,7 @@ public class Floats {
private static final String CODEC_NAME = "SimpleFloats"; private static final String CODEC_NAME = "SimpleFloats";
static final int VERSION_START = 0; static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START; static final int VERSION_CURRENT = VERSION_START;
private static final int INT_DEFAULT = Float private static final byte[] DEFAULTS = new byte[] {0,0,0,0,0,0,0,0};
.floatToRawIntBits(0.0f);
private static final long LONG_DEFAULT = Double
.doubleToRawLongBits(0.0d);
public static Writer getWriter(Directory dir, String id, int precisionBytes, public static Writer getWriter(Directory dir, String id, int precisionBytes,
AtomicLong bytesUsed) throws IOException { AtomicLong bytesUsed) throws IOException {
@ -70,16 +68,27 @@ public class Floats {
abstract static class FloatsWriter extends Writer { abstract static class FloatsWriter extends Writer {
private final String id; private final String id;
private FloatsRef floatsRef; protected FloatsRef floatsRef;
protected int lastDocId = -1; protected int lastDocId = -1;
protected IndexOutput datOut; protected IndexOutput datOut;
private final byte precision; private final byte precision;
private final Directory dir;
protected FloatsWriter(Directory dir, String id, int precision, protected FloatsWriter(Directory dir, String id, int precision,
AtomicLong bytesUsed) throws IOException { AtomicLong bytesUsed) throws IOException {
super(bytesUsed); super(bytesUsed);
this.id = id; this.id = id;
this.precision = (byte) precision; this.precision = (byte) precision;
this.dir = dir;
}
public long ramBytesUsed() {
return 0;
}
final void initDataOut() throws IOException {
assert datOut == null;
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "", datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
Writer.DATA_EXTENSION)); Writer.DATA_EXTENSION));
boolean success = false; boolean success = false;
@ -95,12 +104,8 @@ public class Floats {
} }
} }
public long ramBytesUsed() {
return 0;
}
@Override @Override
protected void add(int docID) throws IOException { protected void mergeDoc(int docID) throws IOException {
add(docID, floatsRef.get()); add(docID, floatsRef.get());
} }
@ -114,13 +119,20 @@ public class Floats {
floatsRef = valuesEnum.getFloat(); floatsRef = valuesEnum.getFloat();
} }
protected abstract int fillDefault(int num) throws IOException; protected final int fillDefault(int numValues) throws IOException {
for (int i = 0; i < numValues; i++) {
datOut.writeBytes(DEFAULTS, precision);
}
return numValues;
}
@Override @Override
protected void merge(MergeState state) throws IOException { protected void merge(MergeState state) throws IOException {
if (datOut == null) {
initDataOut();
}
if (state.bits == null && state.reader instanceof FloatsReader) { if (state.bits == null && state.reader instanceof FloatsReader) {
// no deletes - bulk copy // no deletes - bulk copy
// TODO: should be do bulks with deletes too?
final FloatsReader reader = (FloatsReader) state.reader; final FloatsReader reader = (FloatsReader) state.reader;
assert reader.precisionBytes == (int) precision; assert reader.precisionBytes == (int) precision;
if (reader.maxDoc == 0) if (reader.maxDoc == 0)
@ -131,8 +143,10 @@ public class Floats {
lastDocId += fillDefault(docBase - lastDocId - 1); lastDocId += fillDefault(docBase - lastDocId - 1);
} }
lastDocId += reader.transferTo(datOut); lastDocId += reader.transferTo(datOut);
} else } else {
super.merge(state); super.merge(state);
}
} }
@Override @Override
@ -143,10 +157,12 @@ public class Floats {
// Writes 4 bytes (float) per value // Writes 4 bytes (float) per value
static class Float4Writer extends FloatsWriter { static class Float4Writer extends FloatsWriter {
private int[] values;
protected Float4Writer(Directory dir, String id, AtomicLong bytesUsed) protected Float4Writer(Directory dir, String id, AtomicLong bytesUsed)
throws IOException { throws IOException {
super(dir, id, 4, bytesUsed); super(dir, id, 4, bytesUsed);
values = new int[1];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
} }
@Override @Override
@ -154,76 +170,111 @@ public class Floats {
throws IOException { throws IOException {
assert docID > lastDocId : "docID: " + docID assert docID > lastDocId : "docID: " + docID
+ " must be greater than the last added doc id: " + lastDocId; + " must be greater than the last added doc id: " + lastDocId;
if (docID >= values.length) {
final long len = values.length;
values = ArrayUtil.grow(values, 1 + docID);
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
* ((values.length) - len));
}
values[docID] = Float.floatToRawIntBits((float)v);
lastDocId = docID;
}
@Override
protected void mergeDoc(int docID) throws IOException {
assert datOut != null;
assert docID > lastDocId : "docID: " + docID
+ " must be greater than the last added doc id: " + lastDocId;
if (docID - lastDocId > 1) { if (docID - lastDocId > 1) {
// fill with default values // fill with default values
lastDocId += fillDefault(docID - lastDocId - 1); fillDefault(docID - lastDocId - 1);
} }
assert datOut != null; assert datOut != null;
datOut.writeInt(Float.floatToRawIntBits((float) v)); datOut.writeInt(Float.floatToRawIntBits((float) floatsRef.get()));
++lastDocId; lastDocId = docID;
} }
@Override @Override
public void finish(int docCount) throws IOException { public void finish(int docCount) throws IOException {
boolean success = false;
try { try {
if (docCount > lastDocId + 1) int numDefaultsToAppend = docCount - (lastDocId + 1);
for (int i = lastDocId; i < docCount; i++) { if (datOut == null) {
datOut.writeInt(INT_DEFAULT); // default value initDataOut();
for (int i = 0; i <= lastDocId; i++) {
datOut.writeInt(values[i]);
} }
}
fillDefault(numDefaultsToAppend);
success = true;
} finally { } finally {
datOut.close(); bytesUsed.addAndGet(-(RamUsageEstimator.NUM_BYTES_INT
* ((values.length))));
values = null;
IOUtils.closeSafely(!success, datOut);
} }
} }
@Override
protected int fillDefault(int numValues) throws IOException {
for (int i = 0; i < numValues; i++) {
datOut.writeInt(INT_DEFAULT);
}
return numValues;
}
} }
// Writes 8 bytes (double) per value // Writes 8 bytes (double) per value
static class Float8Writer extends FloatsWriter { static class Float8Writer extends FloatsWriter {
private long[] values;
protected Float8Writer(Directory dir, String id, AtomicLong bytesUsed) protected Float8Writer(Directory dir, String id, AtomicLong bytesUsed)
throws IOException { throws IOException {
super(dir, id, 8, bytesUsed); super(dir, id, 8, bytesUsed);
values = new long[1];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG);
} }
@Override @Override
public void add(int docID, double v) throws IOException { public void add(int docID, double v) throws IOException {
assert docID > lastDocId : "docID: " + docID assert docID > lastDocId : "docID: " + docID
+ " must be greater than the last added doc id: " + lastDocId; + " must be greater than the last added doc id: " + lastDocId;
if (docID >= values.length) {
final long len = values.length;
values = ArrayUtil.grow(values, 1 + docID);
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG
* ((values.length) - len));
}
values[docID] = Double.doubleToLongBits(v);
lastDocId = docID;
}
@Override
protected void mergeDoc(int docID) throws IOException {
assert docID > lastDocId : "docID: " + docID
+ " must be greater than the last added doc id: " + lastDocId;
if (docID - lastDocId > 1) { if (docID - lastDocId > 1) {
// fill with default values // fill with default values
lastDocId += fillDefault(docID - lastDocId - 1); lastDocId += fillDefault(docID - lastDocId - 1);
} }
assert datOut != null; assert datOut != null;
datOut.writeLong(Double.doubleToRawLongBits(v)); datOut.writeLong(Double.doubleToRawLongBits((float) floatsRef.get()));
++lastDocId; lastDocId = docID;
} }
@Override @Override
public void finish(int docCount) throws IOException { public void finish(int docCount) throws IOException {
boolean success = false;
try { try {
if (docCount > lastDocId + 1) int numDefaultsToAppend = docCount - (lastDocId + 1);
for (int i = lastDocId; i < docCount; i++) { if (datOut == null) {
datOut.writeLong(LONG_DEFAULT); // default value initDataOut();
for (int i = 0; i <= lastDocId; i++) {
datOut.writeLong(values[i]);
} }
}
fillDefault(numDefaultsToAppend);
success = true;
} finally { } finally {
datOut.close(); bytesUsed.addAndGet(-(RamUsageEstimator.NUM_BYTES_LONG
* ((values.length))));
values = null;
IOUtils.closeSafely(!success, datOut);
} }
} }
@Override
protected int fillDefault(int numValues) throws IOException {
for (int i = 0; i < numValues; i++) {
datOut.writeLong(LONG_DEFAULT);
}
return numValues;
}
} }
/** /**

View File

@ -157,7 +157,7 @@ class IntsImpl {
} }
@Override @Override
protected void add(int docID) throws IOException { protected void mergeDoc(int docID) throws IOException {
add(docID, intsRef.get()); add(docID, intsRef.get());
} }

View File

@ -113,11 +113,11 @@ public abstract class Writer extends DocValuesConsumer {
* the {@link Writer} implementation. The given document ID must always be * the {@link Writer} implementation. The given document ID must always be
* greater than the previous ID or <tt>0</tt> if called the first time. * greater than the previous ID or <tt>0</tt> if called the first time.
*/ */
protected abstract void add(int docID) throws IOException; protected abstract void mergeDoc(int docID) throws IOException;
/** /**
* Sets the next {@link ValuesEnum} to consume values from on calls to * Sets the next {@link ValuesEnum} to consume values from on calls to
* {@link #add(int)} * {@link #mergeDoc(int)}
* *
* @param valuesEnum * @param valuesEnum
* the next {@link ValuesEnum}, this must not be null * the next {@link ValuesEnum}, this must not be null
@ -159,7 +159,7 @@ public abstract class Writer extends DocValuesConsumer {
} }
} }
if (currentDocId == i) { // we are on the doc to merge if (currentDocId == i) { // we are on the doc to merge
add(docID); mergeDoc(docID);
} }
++docID; ++docID;
} }