mirror of https://github.com/apache/lucene.git
LUCENE-2574: Optimize copies between IndexInput and Output
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@980656 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
66660307d9
commit
f4919e14b8
|
@ -204,7 +204,11 @@ Optimizations
|
|||
* LUCENE-2531: Fix issue when sorting by a String field that was
|
||||
causing too many fallbacks to compare-by-value (instead of by-ord).
|
||||
(Mike McCandless)
|
||||
|
||||
|
||||
* LUCENE-2574: IndexInput exposes copyBytes(IndexOutput, long) to allow for
|
||||
efficient copying by sub-classes. Optimized copy is implemented for RAM and FS
|
||||
streams. (Shai Erera)
|
||||
|
||||
======================= Lucene 3.x (not yet released) =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
|
|
@ -303,6 +303,17 @@ public class CompoundFileReader extends Directory {
|
|||
return length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyBytes(IndexOutput out, long numBytes) throws IOException {
|
||||
// Copy first whatever is in the buffer
|
||||
numBytes -= flushBuffer(out, numBytes);
|
||||
|
||||
// If there are more bytes left to copy, delegate the copy task to the
|
||||
// base IndexInput, in case it can do an optimized copy.
|
||||
if (numBytes > 0) {
|
||||
base.copyBytes(out, numBytes);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -26,9 +26,9 @@ public abstract class BufferedIndexInput extends IndexInput {
|
|||
public static final int BUFFER_SIZE = 1024;
|
||||
|
||||
private int bufferSize = BUFFER_SIZE;
|
||||
|
||||
|
||||
protected byte[] buffer;
|
||||
|
||||
|
||||
private long bufferStart = 0; // position in file of buffer
|
||||
private int bufferLength = 0; // end of valid bytes
|
||||
private int bufferPosition = 0; // next byte to read
|
||||
|
@ -205,4 +205,37 @@ public abstract class BufferedIndexInput extends IndexInput {
|
|||
return clone;
|
||||
}
|
||||
|
||||
/**
|
||||
* Flushes the in-memory bufer to the given output, copying at most
|
||||
* <code>numBytes</code>.
|
||||
* <p>
|
||||
* <b>NOTE:</b> this method does not refill the buffer, however it does
|
||||
* advance the buffer position.
|
||||
*
|
||||
* @return the number of bytes actually flushed from the in-memory buffer.
|
||||
*/
|
||||
protected int flushBuffer(IndexOutput out, long numBytes) throws IOException {
|
||||
int toCopy = bufferLength - bufferPosition;
|
||||
if (toCopy > numBytes) {
|
||||
toCopy = (int) numBytes;
|
||||
}
|
||||
if (toCopy > 0) {
|
||||
out.writeBytes(buffer, bufferPosition, toCopy);
|
||||
bufferPosition += toCopy;
|
||||
}
|
||||
return toCopy;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyBytes(IndexOutput out, long numBytes) throws IOException {
|
||||
assert numBytes >= 0: "numBytes=" + numBytes;
|
||||
|
||||
while (numBytes > 0) {
|
||||
if (bufferLength == bufferPosition) {
|
||||
refill();
|
||||
}
|
||||
numBytes -= flushBuffer(out, numBytes);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -215,26 +215,11 @@ public abstract class Directory implements Closeable {
|
|||
* overwrite it if it does.
|
||||
*/
|
||||
public void copy(Directory to, String src, String dest) throws IOException {
|
||||
IndexOutput os = null;
|
||||
IndexInput is = null;
|
||||
IndexOutput os = to.createOutput(dest);
|
||||
IndexInput is = openInput(src);
|
||||
IOException priorException = null;
|
||||
int bufSize = BufferedIndexOutput.BUFFER_SIZE;
|
||||
byte[] buf = new byte[bufSize];
|
||||
try {
|
||||
// create file in dest directory
|
||||
os = to.createOutput(dest);
|
||||
// read current file
|
||||
is = openInput(src);
|
||||
// and copy to dest directory
|
||||
long len = is.length();
|
||||
long numRead = 0;
|
||||
while (numRead < len) {
|
||||
long left = len - numRead;
|
||||
int toRead = (int) (bufSize < left ? bufSize : left);
|
||||
is.readBytes(buf, 0, toRead);
|
||||
os.writeBytes(buf, toRead);
|
||||
numRead += toRead;
|
||||
}
|
||||
is.copyBytes(os, is.length());
|
||||
} catch (IOException ioe) {
|
||||
priorException = ioe;
|
||||
} finally {
|
||||
|
|
|
@ -18,9 +18,7 @@ package org.apache.lucene.store;
|
|||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.FilenameFilter;
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
|
@ -35,7 +33,6 @@ import java.util.HashSet;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.store.SimpleFSDirectory.SimpleFSIndexInput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
import org.apache.lucene.util.Constants;
|
||||
|
||||
|
@ -438,47 +435,6 @@ public abstract class FSDirectory extends Directory {
|
|||
return chunkSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(Directory to, String src, String dest) throws IOException {
|
||||
if (to instanceof FSDirectory) {
|
||||
FSDirectory target = (FSDirectory) to;
|
||||
target.ensureCanWrite(dest);
|
||||
FileChannel input = null;
|
||||
FileChannel output = null;
|
||||
IOException priorException = null;
|
||||
try {
|
||||
input = new FileInputStream(new File(directory, src)).getChannel();
|
||||
output = new FileOutputStream(new File(target.directory, dest)).getChannel();
|
||||
copy(input, output, input.size());
|
||||
} catch (IOException ioe) {
|
||||
priorException = ioe;
|
||||
} finally {
|
||||
IOUtils.closeSafely(priorException, input, output);
|
||||
}
|
||||
} else {
|
||||
super.copy(to, src, dest);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the content of a given {@link FileChannel} to a destination one. The
|
||||
* copy is done in chunks of 2MB because if transferFrom is used without a
|
||||
* limit when copying a very large file, then an OOM may be thrown (depends on
|
||||
* the state of the RAM in the machine, as well as the OS used). Performance
|
||||
* measurements showed that chunk sizes larger than 2MB do not result in much
|
||||
* faster file copy, therefore we limit the size to be safe with different
|
||||
* file sizes and systems.
|
||||
*/
|
||||
static void copy(FileChannel input, FileChannel output, long numBytes) throws IOException {
|
||||
long pos = output.position();
|
||||
long writeTo = numBytes + pos;
|
||||
while (pos < writeTo) {
|
||||
pos += output.transferFrom(input, pos, Math.min(CHANNEL_CHUNK_SIZE, writeTo - pos));
|
||||
}
|
||||
// transferFrom does not change the position of the channel. Need to change it manually
|
||||
output.position(pos);
|
||||
}
|
||||
|
||||
protected static class FSIndexOutput extends BufferedIndexOutput {
|
||||
private final FSDirectory parent;
|
||||
private final String name;
|
||||
|
@ -501,23 +457,37 @@ public abstract class FSDirectory extends Directory {
|
|||
@Override
|
||||
public void copyBytes(DataInput input, long numBytes) throws IOException {
|
||||
// Optimized copy only if the number of bytes to copy is larger than the
|
||||
// buffer size, and the given IndexInput supports FileChannel copying ..
|
||||
// buffer size, and the given IndexInput supports FileChannel copying.
|
||||
// NOTE: the below check relies on NIOIndexInput extending Simple. If that
|
||||
// changes in the future, we should change the check as well.
|
||||
if (numBytes > BUFFER_SIZE && input instanceof SimpleFSIndexInput) {
|
||||
// flush any bytes in the buffer
|
||||
flush();
|
||||
// do the optimized copy
|
||||
FileChannel in = ((SimpleFSIndexInput) input).file.getChannel();
|
||||
FileChannel out = file.getChannel();
|
||||
copy(in, out, numBytes);
|
||||
// corrects the position in super (BufferedIndexOutput), so that calls
|
||||
// to getFilePointer will return the correct pointer.
|
||||
// Perhaps a specific method is better?
|
||||
super.seek(out.position());
|
||||
} else {
|
||||
if (numBytes <= BUFFER_SIZE || !(input instanceof SimpleFSIndexInput)) {
|
||||
super.copyBytes(input, numBytes);
|
||||
return;
|
||||
}
|
||||
|
||||
SimpleFSIndexInput fsInput = (SimpleFSIndexInput) input;
|
||||
|
||||
// flush any bytes in the buffer
|
||||
flush();
|
||||
|
||||
// flush any bytes in the input's buffer.
|
||||
numBytes -= fsInput.flushBuffer(this, numBytes);
|
||||
|
||||
// do the optimized copy
|
||||
FileChannel in = fsInput.file.getChannel();
|
||||
FileChannel out = file.getChannel();
|
||||
long pos = out.position();
|
||||
long writeTo = numBytes + pos;
|
||||
while (pos < writeTo) {
|
||||
pos += out.transferFrom(in, pos, Math.min(CHANNEL_CHUNK_SIZE, writeTo - pos));
|
||||
}
|
||||
// transferFrom does not change the position of the channel. Need to change it manually
|
||||
out.position(pos);
|
||||
|
||||
// corrects the position in super (BufferedIndexOutput), so that calls
|
||||
// to getFilePointer will return the correct pointer.
|
||||
// Perhaps a specific method is better?
|
||||
super.seek(out.position());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -25,6 +25,9 @@ import java.io.IOException;
|
|||
* @see Directory
|
||||
*/
|
||||
public abstract class IndexInput extends DataInput implements Cloneable,Closeable {
|
||||
|
||||
protected byte[] copyBuf = null;
|
||||
|
||||
/** Closes the stream to further operations. */
|
||||
public abstract void close() throws IOException;
|
||||
|
||||
|
@ -41,4 +44,31 @@ public abstract class IndexInput extends DataInput implements Cloneable,Closeabl
|
|||
|
||||
/** The number of bytes in the file. */
|
||||
public abstract long length();
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies <code>numBytes</code> bytes to the given {@link IndexOutput}.
|
||||
* <p>
|
||||
* <b>NOTE:</b> this method uses an intermediate buffer to copy the bytes.
|
||||
* Consider overriding it in your implementation, if you can make a better,
|
||||
* optimized copy.
|
||||
* <p>
|
||||
* <b>NOTE</b> ensure that there are enough bytes in the input to copy to
|
||||
* output. Otherwise, different exceptions may be thrown, depending on the
|
||||
* implementation.
|
||||
*/
|
||||
public void copyBytes(IndexOutput out, long numBytes) throws IOException {
|
||||
assert numBytes >= 0: "numBytes=" + numBytes;
|
||||
|
||||
if (copyBuf == null) {
|
||||
copyBuf = new byte[BufferedIndexInput.BUFFER_SIZE];
|
||||
}
|
||||
|
||||
while (numBytes > 0) {
|
||||
final int toCopy = (int) (numBytes > copyBuf.length ? copyBuf.length : numBytes);
|
||||
readBytes(copyBuf, 0, toCopy);
|
||||
out.writeBytes(copyBuf, 0, toCopy);
|
||||
numBytes -= toCopy;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
package org.apache.lucene.store;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -19,10 +17,9 @@ import java.io.IOException;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A memory-resident {@link IndexInput} implementation.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
|
||||
/** A memory-resident {@link IndexInput} implementation. */
|
||||
class RAMInputStream extends IndexInput implements Cloneable {
|
||||
static final int BUFFER_SIZE = RAMOutputStream.BUFFER_SIZE;
|
||||
|
||||
|
@ -104,6 +101,27 @@ class RAMInputStream extends IndexInput implements Cloneable {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyBytes(IndexOutput out, long numBytes) throws IOException {
|
||||
assert numBytes >= 0: "numBytes=" + numBytes;
|
||||
|
||||
long left = numBytes;
|
||||
while (left > 0) {
|
||||
if (bufferPosition == bufferLength) {
|
||||
++currentBufferIndex;
|
||||
switchCurrentBuffer(true);
|
||||
}
|
||||
|
||||
final int bytesInBuffer = bufferLength - bufferPosition;
|
||||
final int toCopy = (int) (bytesInBuffer < left ? bytesInBuffer : left);
|
||||
out.writeBytes(currentBuffer, bufferPosition, toCopy);
|
||||
bufferPosition += toCopy;
|
||||
left -= toCopy;
|
||||
}
|
||||
|
||||
assert left == 0: "Insufficient bytes to copy: numBytes=" + numBytes + " copied=" + (numBytes - left);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getFilePointer() {
|
||||
return currentBufferIndex < 0 ? 0 : bufferStart + bufferPosition;
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.io.IOException;
|
|||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
|
||||
public class RAMOutputStream extends IndexOutput {
|
||||
static final int BUFFER_SIZE = 1024;
|
||||
|
||||
|
@ -161,4 +160,26 @@ public class RAMOutputStream extends IndexOutput {
|
|||
public long sizeInBytes() {
|
||||
return file.numBuffers() * BUFFER_SIZE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyBytes(DataInput input, long numBytes) throws IOException {
|
||||
assert numBytes >= 0: "numBytes=" + numBytes;
|
||||
|
||||
while (numBytes > 0) {
|
||||
if (bufferPosition == bufferLength) {
|
||||
currentBufferIndex++;
|
||||
switchCurrentBuffer();
|
||||
}
|
||||
|
||||
int toCopy = currentBuffer.length - bufferPosition;
|
||||
if (numBytes < toCopy) {
|
||||
toCopy = (int) numBytes;
|
||||
}
|
||||
input.readBytes(currentBuffer, bufferPosition, toCopy, false);
|
||||
numBytes -= toCopy;
|
||||
bufferPosition += toCopy;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -160,5 +160,12 @@ public class SimpleFSDirectory extends FSDirectory {
|
|||
boolean isFDValid() throws IOException {
|
||||
return file.getFD().valid();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyBytes(IndexOutput out, long numBytes) throws IOException {
|
||||
numBytes -= flushBuffer(out, numBytes);
|
||||
// If out is FSIndexOutput, the copy will be optimized
|
||||
out.copyBytes(this, numBytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue