mirror of https://github.com/apache/lucene.git
LUCENE-3216: keep float doc values in memory during indexing while merge directly to the target file
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1140044 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b15ca9a6b9
commit
28c15b9637
|
@ -387,7 +387,7 @@ public final class Bytes {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void add(int docID) throws IOException {
|
||||
protected void mergeDoc(int docID) throws IOException {
|
||||
add(docID, bytesRef);
|
||||
}
|
||||
|
||||
|
|
|
@ -25,10 +25,12 @@ import org.apache.lucene.index.values.IndexDocValues.Source;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
import org.apache.lucene.util.FloatsRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Exposes {@link Writer} and reader ({@link Source}) for 32 bit and 64 bit
|
||||
|
@ -44,11 +46,7 @@ public class Floats {
|
|||
private static final String CODEC_NAME = "SimpleFloats";
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
private static final int INT_DEFAULT = Float
|
||||
.floatToRawIntBits(0.0f);
|
||||
private static final long LONG_DEFAULT = Double
|
||||
.doubleToRawLongBits(0.0d);
|
||||
|
||||
private static final byte[] DEFAULTS = new byte[] {0,0,0,0,0,0,0,0};
|
||||
|
||||
public static Writer getWriter(Directory dir, String id, int precisionBytes,
|
||||
AtomicLong bytesUsed) throws IOException {
|
||||
|
@ -70,16 +68,27 @@ public class Floats {
|
|||
|
||||
abstract static class FloatsWriter extends Writer {
|
||||
private final String id;
|
||||
private FloatsRef floatsRef;
|
||||
protected FloatsRef floatsRef;
|
||||
protected int lastDocId = -1;
|
||||
protected IndexOutput datOut;
|
||||
private final byte precision;
|
||||
private final Directory dir;
|
||||
|
||||
protected FloatsWriter(Directory dir, String id, int precision,
|
||||
AtomicLong bytesUsed) throws IOException {
|
||||
super(bytesUsed);
|
||||
this.id = id;
|
||||
this.precision = (byte) precision;
|
||||
this.dir = dir;
|
||||
|
||||
}
|
||||
|
||||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
final void initDataOut() throws IOException {
|
||||
assert datOut == null;
|
||||
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
|
||||
Writer.DATA_EXTENSION));
|
||||
boolean success = false;
|
||||
|
@ -95,12 +104,8 @@ public class Floats {
|
|||
}
|
||||
}
|
||||
|
||||
public long ramBytesUsed() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void add(int docID) throws IOException {
|
||||
protected void mergeDoc(int docID) throws IOException {
|
||||
add(docID, floatsRef.get());
|
||||
}
|
||||
|
||||
|
@ -114,13 +119,20 @@ public class Floats {
|
|||
floatsRef = valuesEnum.getFloat();
|
||||
}
|
||||
|
||||
protected abstract int fillDefault(int num) throws IOException;
|
||||
protected final int fillDefault(int numValues) throws IOException {
|
||||
for (int i = 0; i < numValues; i++) {
|
||||
datOut.writeBytes(DEFAULTS, precision);
|
||||
}
|
||||
return numValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void merge(MergeState state) throws IOException {
|
||||
if (datOut == null) {
|
||||
initDataOut();
|
||||
}
|
||||
if (state.bits == null && state.reader instanceof FloatsReader) {
|
||||
// no deletes - bulk copy
|
||||
// TODO: should be do bulks with deletes too?
|
||||
final FloatsReader reader = (FloatsReader) state.reader;
|
||||
assert reader.precisionBytes == (int) precision;
|
||||
if (reader.maxDoc == 0)
|
||||
|
@ -131,10 +143,12 @@ public class Floats {
|
|||
lastDocId += fillDefault(docBase - lastDocId - 1);
|
||||
}
|
||||
lastDocId += reader.transferTo(datOut);
|
||||
} else
|
||||
} else {
|
||||
super.merge(state);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void files(Collection<String> files) throws IOException {
|
||||
files.add(IndexFileNames.segmentFileName(id, "", Writer.DATA_EXTENSION));
|
||||
|
@ -143,10 +157,12 @@ public class Floats {
|
|||
|
||||
// Writes 4 bytes (float) per value
|
||||
static class Float4Writer extends FloatsWriter {
|
||||
|
||||
private int[] values;
|
||||
protected Float4Writer(Directory dir, String id, AtomicLong bytesUsed)
|
||||
throws IOException {
|
||||
super(dir, id, 4, bytesUsed);
|
||||
values = new int[1];
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -154,46 +170,80 @@ public class Floats {
|
|||
throws IOException {
|
||||
assert docID > lastDocId : "docID: " + docID
|
||||
+ " must be greater than the last added doc id: " + lastDocId;
|
||||
if (docID >= values.length) {
|
||||
final long len = values.length;
|
||||
values = ArrayUtil.grow(values, 1 + docID);
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
|
||||
* ((values.length) - len));
|
||||
}
|
||||
values[docID] = Float.floatToRawIntBits((float)v);
|
||||
lastDocId = docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void mergeDoc(int docID) throws IOException {
|
||||
assert datOut != null;
|
||||
assert docID > lastDocId : "docID: " + docID
|
||||
+ " must be greater than the last added doc id: " + lastDocId;
|
||||
if (docID - lastDocId > 1) {
|
||||
// fill with default values
|
||||
lastDocId += fillDefault(docID - lastDocId - 1);
|
||||
fillDefault(docID - lastDocId - 1);
|
||||
}
|
||||
assert datOut != null;
|
||||
datOut.writeInt(Float.floatToRawIntBits((float) v));
|
||||
++lastDocId;
|
||||
datOut.writeInt(Float.floatToRawIntBits((float) floatsRef.get()));
|
||||
lastDocId = docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish(int docCount) throws IOException {
|
||||
boolean success = false;
|
||||
try {
|
||||
if (docCount > lastDocId + 1)
|
||||
for (int i = lastDocId; i < docCount; i++) {
|
||||
datOut.writeInt(INT_DEFAULT); // default value
|
||||
int numDefaultsToAppend = docCount - (lastDocId + 1);
|
||||
if (datOut == null) {
|
||||
initDataOut();
|
||||
for (int i = 0; i <= lastDocId; i++) {
|
||||
datOut.writeInt(values[i]);
|
||||
}
|
||||
}
|
||||
fillDefault(numDefaultsToAppend);
|
||||
success = true;
|
||||
} finally {
|
||||
datOut.close();
|
||||
bytesUsed.addAndGet(-(RamUsageEstimator.NUM_BYTES_INT
|
||||
* ((values.length))));
|
||||
values = null;
|
||||
IOUtils.closeSafely(!success, datOut);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int fillDefault(int numValues) throws IOException {
|
||||
for (int i = 0; i < numValues; i++) {
|
||||
datOut.writeInt(INT_DEFAULT);
|
||||
}
|
||||
return numValues;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Writes 8 bytes (double) per value
|
||||
static class Float8Writer extends FloatsWriter {
|
||||
|
||||
private long[] values;
|
||||
protected Float8Writer(Directory dir, String id, AtomicLong bytesUsed)
|
||||
throws IOException {
|
||||
super(dir, id, 8, bytesUsed);
|
||||
values = new long[1];
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(int docID, double v) throws IOException {
|
||||
assert docID > lastDocId : "docID: " + docID
|
||||
+ " must be greater than the last added doc id: " + lastDocId;
|
||||
if (docID >= values.length) {
|
||||
final long len = values.length;
|
||||
values = ArrayUtil.grow(values, 1 + docID);
|
||||
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG
|
||||
* ((values.length) - len));
|
||||
}
|
||||
values[docID] = Double.doubleToLongBits(v);
|
||||
lastDocId = docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void mergeDoc(int docID) throws IOException {
|
||||
assert docID > lastDocId : "docID: " + docID
|
||||
+ " must be greater than the last added doc id: " + lastDocId;
|
||||
if (docID - lastDocId > 1) {
|
||||
|
@ -201,29 +251,30 @@ public class Floats {
|
|||
lastDocId += fillDefault(docID - lastDocId - 1);
|
||||
}
|
||||
assert datOut != null;
|
||||
datOut.writeLong(Double.doubleToRawLongBits(v));
|
||||
++lastDocId;
|
||||
datOut.writeLong(Double.doubleToRawLongBits((float) floatsRef.get()));
|
||||
lastDocId = docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish(int docCount) throws IOException {
|
||||
boolean success = false;
|
||||
try {
|
||||
if (docCount > lastDocId + 1)
|
||||
for (int i = lastDocId; i < docCount; i++) {
|
||||
datOut.writeLong(LONG_DEFAULT); // default value
|
||||
int numDefaultsToAppend = docCount - (lastDocId + 1);
|
||||
if (datOut == null) {
|
||||
initDataOut();
|
||||
for (int i = 0; i <= lastDocId; i++) {
|
||||
datOut.writeLong(values[i]);
|
||||
}
|
||||
}
|
||||
fillDefault(numDefaultsToAppend);
|
||||
success = true;
|
||||
} finally {
|
||||
datOut.close();
|
||||
bytesUsed.addAndGet(-(RamUsageEstimator.NUM_BYTES_LONG
|
||||
* ((values.length))));
|
||||
values = null;
|
||||
IOUtils.closeSafely(!success, datOut);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int fillDefault(int numValues) throws IOException {
|
||||
for (int i = 0; i < numValues; i++) {
|
||||
datOut.writeLong(LONG_DEFAULT);
|
||||
}
|
||||
return numValues;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -157,7 +157,7 @@ class IntsImpl {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void add(int docID) throws IOException {
|
||||
protected void mergeDoc(int docID) throws IOException {
|
||||
add(docID, intsRef.get());
|
||||
}
|
||||
|
||||
|
|
|
@ -113,11 +113,11 @@ public abstract class Writer extends DocValuesConsumer {
|
|||
* the {@link Writer} implementation. The given document ID must always be
|
||||
* greater than the previous ID or <tt>0</tt> if called the first time.
|
||||
*/
|
||||
protected abstract void add(int docID) throws IOException;
|
||||
protected abstract void mergeDoc(int docID) throws IOException;
|
||||
|
||||
/**
|
||||
* Sets the next {@link ValuesEnum} to consume values from on calls to
|
||||
* {@link #add(int)}
|
||||
* {@link #mergeDoc(int)}
|
||||
*
|
||||
* @param valuesEnum
|
||||
* the next {@link ValuesEnum}, this must not be null
|
||||
|
@ -159,7 +159,7 @@ public abstract class Writer extends DocValuesConsumer {
|
|||
}
|
||||
}
|
||||
if (currentDocId == i) { // we are on the doc to merge
|
||||
add(docID);
|
||||
mergeDoc(docID);
|
||||
}
|
||||
++docID;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue