mirror of https://github.com/apache/lucene.git
LUCENE-3216: keep float doc values in memory during indexing while merge directly to the target file
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1140044 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b15ca9a6b9
commit
28c15b9637
|
@ -387,7 +387,7 @@ public final class Bytes {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void add(int docID) throws IOException {
|
protected void mergeDoc(int docID) throws IOException {
|
||||||
add(docID, bytesRef);
|
add(docID, bytesRef);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,10 +25,12 @@ import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.lucene.util.CodecUtil;
|
import org.apache.lucene.util.CodecUtil;
|
||||||
import org.apache.lucene.util.FloatsRef;
|
import org.apache.lucene.util.FloatsRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Exposes {@link Writer} and reader ({@link Source}) for 32 bit and 64 bit
|
* Exposes {@link Writer} and reader ({@link Source}) for 32 bit and 64 bit
|
||||||
|
@ -44,11 +46,7 @@ public class Floats {
|
||||||
private static final String CODEC_NAME = "SimpleFloats";
|
private static final String CODEC_NAME = "SimpleFloats";
|
||||||
static final int VERSION_START = 0;
|
static final int VERSION_START = 0;
|
||||||
static final int VERSION_CURRENT = VERSION_START;
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
private static final int INT_DEFAULT = Float
|
private static final byte[] DEFAULTS = new byte[] {0,0,0,0,0,0,0,0};
|
||||||
.floatToRawIntBits(0.0f);
|
|
||||||
private static final long LONG_DEFAULT = Double
|
|
||||||
.doubleToRawLongBits(0.0d);
|
|
||||||
|
|
||||||
|
|
||||||
public static Writer getWriter(Directory dir, String id, int precisionBytes,
|
public static Writer getWriter(Directory dir, String id, int precisionBytes,
|
||||||
AtomicLong bytesUsed) throws IOException {
|
AtomicLong bytesUsed) throws IOException {
|
||||||
|
@ -70,16 +68,27 @@ public class Floats {
|
||||||
|
|
||||||
abstract static class FloatsWriter extends Writer {
|
abstract static class FloatsWriter extends Writer {
|
||||||
private final String id;
|
private final String id;
|
||||||
private FloatsRef floatsRef;
|
protected FloatsRef floatsRef;
|
||||||
protected int lastDocId = -1;
|
protected int lastDocId = -1;
|
||||||
protected IndexOutput datOut;
|
protected IndexOutput datOut;
|
||||||
private final byte precision;
|
private final byte precision;
|
||||||
|
private final Directory dir;
|
||||||
|
|
||||||
protected FloatsWriter(Directory dir, String id, int precision,
|
protected FloatsWriter(Directory dir, String id, int precision,
|
||||||
AtomicLong bytesUsed) throws IOException {
|
AtomicLong bytesUsed) throws IOException {
|
||||||
super(bytesUsed);
|
super(bytesUsed);
|
||||||
this.id = id;
|
this.id = id;
|
||||||
this.precision = (byte) precision;
|
this.precision = (byte) precision;
|
||||||
|
this.dir = dir;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
final void initDataOut() throws IOException {
|
||||||
|
assert datOut == null;
|
||||||
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
|
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
|
||||||
Writer.DATA_EXTENSION));
|
Writer.DATA_EXTENSION));
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
|
@ -95,12 +104,8 @@ public class Floats {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public long ramBytesUsed() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void add(int docID) throws IOException {
|
protected void mergeDoc(int docID) throws IOException {
|
||||||
add(docID, floatsRef.get());
|
add(docID, floatsRef.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,13 +119,20 @@ public class Floats {
|
||||||
floatsRef = valuesEnum.getFloat();
|
floatsRef = valuesEnum.getFloat();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract int fillDefault(int num) throws IOException;
|
protected final int fillDefault(int numValues) throws IOException {
|
||||||
|
for (int i = 0; i < numValues; i++) {
|
||||||
|
datOut.writeBytes(DEFAULTS, precision);
|
||||||
|
}
|
||||||
|
return numValues;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void merge(MergeState state) throws IOException {
|
protected void merge(MergeState state) throws IOException {
|
||||||
|
if (datOut == null) {
|
||||||
|
initDataOut();
|
||||||
|
}
|
||||||
if (state.bits == null && state.reader instanceof FloatsReader) {
|
if (state.bits == null && state.reader instanceof FloatsReader) {
|
||||||
// no deletes - bulk copy
|
// no deletes - bulk copy
|
||||||
// TODO: should be do bulks with deletes too?
|
|
||||||
final FloatsReader reader = (FloatsReader) state.reader;
|
final FloatsReader reader = (FloatsReader) state.reader;
|
||||||
assert reader.precisionBytes == (int) precision;
|
assert reader.precisionBytes == (int) precision;
|
||||||
if (reader.maxDoc == 0)
|
if (reader.maxDoc == 0)
|
||||||
|
@ -131,8 +143,10 @@ public class Floats {
|
||||||
lastDocId += fillDefault(docBase - lastDocId - 1);
|
lastDocId += fillDefault(docBase - lastDocId - 1);
|
||||||
}
|
}
|
||||||
lastDocId += reader.transferTo(datOut);
|
lastDocId += reader.transferTo(datOut);
|
||||||
} else
|
} else {
|
||||||
super.merge(state);
|
super.merge(state);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -143,10 +157,12 @@ public class Floats {
|
||||||
|
|
||||||
// Writes 4 bytes (float) per value
|
// Writes 4 bytes (float) per value
|
||||||
static class Float4Writer extends FloatsWriter {
|
static class Float4Writer extends FloatsWriter {
|
||||||
|
private int[] values;
|
||||||
protected Float4Writer(Directory dir, String id, AtomicLong bytesUsed)
|
protected Float4Writer(Directory dir, String id, AtomicLong bytesUsed)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super(dir, id, 4, bytesUsed);
|
super(dir, id, 4, bytesUsed);
|
||||||
|
values = new int[1];
|
||||||
|
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -154,76 +170,111 @@ public class Floats {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
assert docID > lastDocId : "docID: " + docID
|
assert docID > lastDocId : "docID: " + docID
|
||||||
+ " must be greater than the last added doc id: " + lastDocId;
|
+ " must be greater than the last added doc id: " + lastDocId;
|
||||||
|
if (docID >= values.length) {
|
||||||
|
final long len = values.length;
|
||||||
|
values = ArrayUtil.grow(values, 1 + docID);
|
||||||
|
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
|
||||||
|
* ((values.length) - len));
|
||||||
|
}
|
||||||
|
values[docID] = Float.floatToRawIntBits((float)v);
|
||||||
|
lastDocId = docID;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void mergeDoc(int docID) throws IOException {
|
||||||
|
assert datOut != null;
|
||||||
|
assert docID > lastDocId : "docID: " + docID
|
||||||
|
+ " must be greater than the last added doc id: " + lastDocId;
|
||||||
if (docID - lastDocId > 1) {
|
if (docID - lastDocId > 1) {
|
||||||
// fill with default values
|
// fill with default values
|
||||||
lastDocId += fillDefault(docID - lastDocId - 1);
|
fillDefault(docID - lastDocId - 1);
|
||||||
}
|
}
|
||||||
assert datOut != null;
|
assert datOut != null;
|
||||||
datOut.writeInt(Float.floatToRawIntBits((float) v));
|
datOut.writeInt(Float.floatToRawIntBits((float) floatsRef.get()));
|
||||||
++lastDocId;
|
lastDocId = docID;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void finish(int docCount) throws IOException {
|
public void finish(int docCount) throws IOException {
|
||||||
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
if (docCount > lastDocId + 1)
|
int numDefaultsToAppend = docCount - (lastDocId + 1);
|
||||||
for (int i = lastDocId; i < docCount; i++) {
|
if (datOut == null) {
|
||||||
datOut.writeInt(INT_DEFAULT); // default value
|
initDataOut();
|
||||||
|
for (int i = 0; i <= lastDocId; i++) {
|
||||||
|
datOut.writeInt(values[i]);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
fillDefault(numDefaultsToAppend);
|
||||||
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
datOut.close();
|
bytesUsed.addAndGet(-(RamUsageEstimator.NUM_BYTES_INT
|
||||||
|
* ((values.length))));
|
||||||
|
values = null;
|
||||||
|
IOUtils.closeSafely(!success, datOut);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
protected int fillDefault(int numValues) throws IOException {
|
|
||||||
for (int i = 0; i < numValues; i++) {
|
|
||||||
datOut.writeInt(INT_DEFAULT);
|
|
||||||
}
|
|
||||||
return numValues;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Writes 8 bytes (double) per value
|
// Writes 8 bytes (double) per value
|
||||||
static class Float8Writer extends FloatsWriter {
|
static class Float8Writer extends FloatsWriter {
|
||||||
|
private long[] values;
|
||||||
protected Float8Writer(Directory dir, String id, AtomicLong bytesUsed)
|
protected Float8Writer(Directory dir, String id, AtomicLong bytesUsed)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super(dir, id, 8, bytesUsed);
|
super(dir, id, 8, bytesUsed);
|
||||||
|
values = new long[1];
|
||||||
|
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void add(int docID, double v) throws IOException {
|
public void add(int docID, double v) throws IOException {
|
||||||
assert docID > lastDocId : "docID: " + docID
|
assert docID > lastDocId : "docID: " + docID
|
||||||
+ " must be greater than the last added doc id: " + lastDocId;
|
+ " must be greater than the last added doc id: " + lastDocId;
|
||||||
|
if (docID >= values.length) {
|
||||||
|
final long len = values.length;
|
||||||
|
values = ArrayUtil.grow(values, 1 + docID);
|
||||||
|
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG
|
||||||
|
* ((values.length) - len));
|
||||||
|
}
|
||||||
|
values[docID] = Double.doubleToLongBits(v);
|
||||||
|
lastDocId = docID;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void mergeDoc(int docID) throws IOException {
|
||||||
|
assert docID > lastDocId : "docID: " + docID
|
||||||
|
+ " must be greater than the last added doc id: " + lastDocId;
|
||||||
if (docID - lastDocId > 1) {
|
if (docID - lastDocId > 1) {
|
||||||
// fill with default values
|
// fill with default values
|
||||||
lastDocId += fillDefault(docID - lastDocId - 1);
|
lastDocId += fillDefault(docID - lastDocId - 1);
|
||||||
}
|
}
|
||||||
assert datOut != null;
|
assert datOut != null;
|
||||||
datOut.writeLong(Double.doubleToRawLongBits(v));
|
datOut.writeLong(Double.doubleToRawLongBits((float) floatsRef.get()));
|
||||||
++lastDocId;
|
lastDocId = docID;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void finish(int docCount) throws IOException {
|
public void finish(int docCount) throws IOException {
|
||||||
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
if (docCount > lastDocId + 1)
|
int numDefaultsToAppend = docCount - (lastDocId + 1);
|
||||||
for (int i = lastDocId; i < docCount; i++) {
|
if (datOut == null) {
|
||||||
datOut.writeLong(LONG_DEFAULT); // default value
|
initDataOut();
|
||||||
|
for (int i = 0; i <= lastDocId; i++) {
|
||||||
|
datOut.writeLong(values[i]);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
fillDefault(numDefaultsToAppend);
|
||||||
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
datOut.close();
|
bytesUsed.addAndGet(-(RamUsageEstimator.NUM_BYTES_LONG
|
||||||
|
* ((values.length))));
|
||||||
|
values = null;
|
||||||
|
IOUtils.closeSafely(!success, datOut);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
protected int fillDefault(int numValues) throws IOException {
|
|
||||||
for (int i = 0; i < numValues; i++) {
|
|
||||||
datOut.writeLong(LONG_DEFAULT);
|
|
||||||
}
|
|
||||||
return numValues;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -157,7 +157,7 @@ class IntsImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void add(int docID) throws IOException {
|
protected void mergeDoc(int docID) throws IOException {
|
||||||
add(docID, intsRef.get());
|
add(docID, intsRef.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -113,11 +113,11 @@ public abstract class Writer extends DocValuesConsumer {
|
||||||
* the {@link Writer} implementation. The given document ID must always be
|
* the {@link Writer} implementation. The given document ID must always be
|
||||||
* greater than the previous ID or <tt>0</tt> if called the first time.
|
* greater than the previous ID or <tt>0</tt> if called the first time.
|
||||||
*/
|
*/
|
||||||
protected abstract void add(int docID) throws IOException;
|
protected abstract void mergeDoc(int docID) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the next {@link ValuesEnum} to consume values from on calls to
|
* Sets the next {@link ValuesEnum} to consume values from on calls to
|
||||||
* {@link #add(int)}
|
* {@link #mergeDoc(int)}
|
||||||
*
|
*
|
||||||
* @param valuesEnum
|
* @param valuesEnum
|
||||||
* the next {@link ValuesEnum}, this must not be null
|
* the next {@link ValuesEnum}, this must not be null
|
||||||
|
@ -159,7 +159,7 @@ public abstract class Writer extends DocValuesConsumer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (currentDocId == i) { // we are on the doc to merge
|
if (currentDocId == i) { // we are on the doc to merge
|
||||||
add(docID);
|
mergeDoc(docID);
|
||||||
}
|
}
|
||||||
++docID;
|
++docID;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue