LUCENE-2795: genericize DirectIOLinuxDir -> NatuveUnixDirectory

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1237338 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-01-29 16:49:25 +00:00
parent 1b2f952ecc
commit 005fec5a58
5 changed files with 152 additions and 74 deletions

View File

@ -62,6 +62,14 @@ New Features
* LUCENE-3602: Added query time joining under the join module. (Martijn van Groningen, Michael McCandless)
* LUCENE-2795: Generified DirectIOLinuxDirectory to work across any
unix supporting the O_DIRECT flag when opening a file (tested on
Linux and OS X but likely other Unixes will work), and improved it
so it can be used for indexing and searching. The directory uses
direct IO when doing large merges to avoid unnecessarily evicting
cached IO pages due to large merges. (Varun Thacker, Mike
McCandless)
API Changes
* LUCENE-2606: Changed RegexCapabilities interface to fix thread

View File

@ -40,11 +40,13 @@
<fileset file="${src.dir}/org/apache/lucene/store/NativePosixUtil.cpp" />
<includepath>
<pathelement location="${java.home}/../include"/>
<pathelement location="${java.home}/include"/>
<pathelement location="${java.home}/../include/linux"/>
<pathelement location="${java.home}/../include/solaris"/>
</includepath>
<compilerarg value="-fPIC" />
<linkerarg value="-lstdc++" />
</cc>
</target>

View File

@ -15,6 +15,16 @@
* the License.
*/
#ifdef LINUX
#define DIRECT_FLAG O_DIRECT | O_NOATIME
#define LINUX
#elif __APPLE__
#define DIRECT_FLAG 0
#define OSX
#else
#define DIRECT_FLAG O_DIRECT // __unix__ is not used as even Linux falls under it.
#endif
#include <jni.h>
#include <fcntl.h> // posix_fadvise, constants for open
#include <string.h> // strerror
@ -26,6 +36,7 @@
// java -cp .:lib/junit-4.7.jar:./build/classes/test:./build/classes/java:./build/classes/demo -Dlucene.version=2.9-dev -DtempDir=build -ea org.junit.runner.JUnitCore org.apache.lucene.index.TestDoc
#ifdef LINUX
/*
* Class: org_apache_lucene_store_NativePosixUtil
* Method: posix_fadvise
@ -89,7 +100,7 @@ JNIEXPORT jint JNICALL Java_org_apache_lucene_store_NativePosixUtil_posix_1fadvi
return 0;
}
#endif
/*
* Class: org_apache_lucene_store_NativePosixUtil
@ -107,16 +118,26 @@ JNIEXPORT jobject JNICALL Java_org_apache_lucene_store_NativePosixUtil_open_1dir
char *fname;
class_ioex = env->FindClass("java/io/IOException");
if (class_ioex == NULL) return NULL;
if (class_ioex == NULL) {
return NULL;
}
class_fdesc = env->FindClass("java/io/FileDescriptor");
if (class_fdesc == NULL) return NULL;
if (class_fdesc == NULL) {
return NULL;
}
fname = (char *) env->GetStringUTFChars(filename, NULL);
if (readOnly) {
fd = open(fname, O_RDONLY | O_DIRECT | O_NOATIME);
fd = open(fname, O_RDONLY | DIRECT_FLAG);
#ifdef OSX
fcntl(fd, F_NOCACHE, 1);
#endif
} else {
fd = open(fname, O_RDWR | O_CREAT | O_DIRECT | O_NOATIME, 0666);
fd = open(fname, O_RDWR | O_CREAT | DIRECT_FLAG, 0666);
#ifdef OSX
fcntl(fd, F_NOCACHE, 1);
#endif
}
//printf("open %s -> %d; ro %d\n", fname, fd, readOnly); fflush(stdout);
@ -131,19 +152,22 @@ JNIEXPORT jobject JNICALL Java_org_apache_lucene_store_NativePosixUtil_open_1dir
// construct a new FileDescriptor
const_fdesc = env->GetMethodID(class_fdesc, "<init>", "()V");
if (const_fdesc == NULL) return NULL;
if (const_fdesc == NULL) {
return NULL;
}
ret = env->NewObject(class_fdesc, const_fdesc);
// poke the "fd" field with the file descriptor
field_fd = env->GetFieldID(class_fdesc, "fd", "I");
if (field_fd == NULL) return NULL;
if (field_fd == NULL) {
return NULL;
}
env->SetIntField(ret, field_fd, fd);
// and return it
return ret;
}
/*
* Class: org_apache_lucene_store_NativePosixUtil
* Method: pread

View File

@ -27,68 +27,120 @@ import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import org.apache.lucene.store.Directory; // javadoc
import org.apache.lucene.store.NativeFSLockFactory; // javadoc
import org.apache.lucene.store.IOContext.Context;
// TODO
// - newer Linux kernel versions (after 2.6.29) have
// improved MADV_SEQUENTIAL (and hopefully also
// FADV_SEQUENTIAL) interaction with the buffer
// cache; we should explore using that instead of direct
// IO when context is merge
/**
* An {@link Directory} implementation that uses the
* Linux-specific O_DIRECT flag to bypass all OS level
* caching. To use this you must compile
* A {@link Directory} implementation for all Unixes that uses
* DIRECT I/O to bypass OS level IO caching during
* merging. For all other cases (searching, writing) we delegate
* to the provided Directory instance.
*
* <p>See <a
* href="../../../../../contrib-misc/overview-summary.html#NativeUnixDirectory">Overview</a>
* for more details.
*
* <p>To use this you must compile
* NativePosixUtil.cpp (exposes Linux-specific APIs through
* JNI) for your platform.
* JNI) for your platform, by running <code>ant
* build-native-unix</code>, and then putting the resulting
* <code>libNativePosixUtil.so</code> (from
* <code>lucene/build/native</code>) onto your dynamic
* linker search path.
*
* <p><b>WARNING</b>: this code is very new and quite easily
* could contain horrible bugs. For example, here's one
* known issue: if you use seek in IndexOutput, and then
* known issue: if you use seek in <code>IndexOutput</code>, and then
* write more than one buffer's worth of bytes, then the
* file will be wrong. Lucene does not do this (only writes
* small number of bytes after seek).
* file will be wrong. Lucene does not do this today (only writes
* small number of bytes after seek), but that may change.
*
* <p>This directory passes Solr and Lucene tests on Linux
* and OS X; other Unixes should work but have not been
* tested! Use at your own risk.
*
* @lucene.experimental
*/
public class DirectIOLinuxDirectory extends FSDirectory {
public class NativeUnixDirectory extends FSDirectory {
// TODO: this is OS dependent, but likely 512 is the LCD
private final static long ALIGN = 512;
private final static long ALIGN_NOT_MASK = ~(ALIGN-1);
/** Default buffer size before writing to disk (256 MB);
* larger means less IO load but more RAM and direct
* buffer storage space consumed during merging. */
private final int forcedBufferSize;
public final static int DEFAULT_MERGE_BUFFER_SIZE = 262144;
/** Default min expected merge size before direct IO is
* used (10 MB): */
public final static long DEFAULT_MIN_BYTES_DIRECT = 10*1024*1024;
private final int mergeBufferSize;
private final long minBytesDirect;
private final Directory delegate;
/** Create a new NIOFSDirectory for the named location.
*
* @param path the path of the directory
* @param lockFactory the lock factory to use, or null for the default
* ({@link NativeFSLockFactory});
* @param forcedBufferSize if this is 0, just use Lucene's
* default buffer size; else, force this buffer size.
* For best performance, force the buffer size to
* something fairly large (eg 1 MB), but note that this
* will eat up the JRE's direct buffer storage space
* @param mergeBufferSize Size of buffer to use for
* merging. See {@link #DEFAULT_MERGE_BUFFER_SIZE}.
* @param minBytesDirect Merges, or files to be opened for
* reading, smaller than this will
* not use direct IO. See {@link
* #DEFAULT_MIN_BYTES_DIRECT}
* @param delegate fallback Directory for non-merges
* @throws IOException
*/
public DirectIOLinuxDirectory(File path, LockFactory lockFactory, int forcedBufferSize) throws IOException {
super(path, lockFactory);
this.forcedBufferSize = forcedBufferSize;
public NativeUnixDirectory(File path, int mergeBufferSize, long minBytesDirect, Directory delegate) throws IOException {
super(path, delegate.getLockFactory());
if ((mergeBufferSize & ALIGN) != 0) {
throw new IllegalArgumentException("mergeBufferSize must be 0 mod " + ALIGN + " (got: " + mergeBufferSize + ")");
}
this.mergeBufferSize = mergeBufferSize;
this.minBytesDirect = minBytesDirect;
this.delegate = delegate;
}
/** Create a new NIOFSDirectory for the named location.
*
* @param path the path of the directory
* @param delegate fallback Directory for non-merges
* @throws IOException
*/
public NativeUnixDirectory(File path, Directory delegate) throws IOException {
this(path, DEFAULT_MERGE_BUFFER_SIZE, DEFAULT_MIN_BYTES_DIRECT, delegate);
}
@Override
public IndexInput openInput(String name, IOContext context) throws IOException {
ensureOpen();
return new DirectIOLinuxIndexInput(new File(getDirectory(), name),
bufferSize(context));
if (context.context != Context.MERGE || context.mergeInfo.estimatedMergeBytes < minBytesDirect || fileLength(name) < minBytesDirect) {
return delegate.openInput(name, context);
} else {
return new NativeUnixIndexInput(new File(getDirectory(), name), mergeBufferSize);
}
}
@Override
public IndexOutput createOutput(String name, IOContext context) throws IOException {
ensureOpen();
ensureCanWrite(name);
return new DirectIOLinuxIndexOutput(new File(getDirectory(), name), bufferSize(context));
}
private int bufferSize(IOContext context) {
return forcedBufferSize != 0 ? forcedBufferSize : BufferedIndexInput
.bufferSize(context);
if (context.context != Context.MERGE || context.mergeInfo.estimatedMergeBytes < minBytesDirect) {
return delegate.createOutput(name, context);
} else {
ensureCanWrite(name);
return new NativeUnixIndexOutput(new File(getDirectory(), name), mergeBufferSize);
}
}
private final static class DirectIOLinuxIndexOutput extends IndexOutput {
private final static class NativeUnixIndexOutput extends IndexOutput {
private final ByteBuffer buffer;
private final FileOutputStream fos;
private final FileChannel channel;
@ -101,9 +153,9 @@ public class DirectIOLinuxDirectory extends FSDirectory {
private long fileLength;
private boolean isOpen;
public DirectIOLinuxIndexOutput(File path, int bufferSize) throws IOException {
public NativeUnixIndexOutput(File path, int bufferSize) throws IOException {
//this.path = path;
FileDescriptor fd = NativePosixUtil.open_direct(path.toString(), false);
final FileDescriptor fd = NativePosixUtil.open_direct(path.toString(), false);
fos = new FileOutputStream(fd);
//fos = new FileOutputStream(path);
channel = fos.getChannel();
@ -206,7 +258,7 @@ public class DirectIOLinuxDirectory extends FSDirectory {
@Override
public long length() throws IOException {
return fileLength;
return fileLength + bufferPos;
}
@Override
@ -233,7 +285,7 @@ public class DirectIOLinuxDirectory extends FSDirectory {
}
}
private final static class DirectIOLinuxIndexInput extends IndexInput {
private final static class NativeUnixIndexInput extends IndexInput {
private final ByteBuffer buffer;
private final FileInputStream fis;
private final FileChannel channel;
@ -244,10 +296,9 @@ public class DirectIOLinuxDirectory extends FSDirectory {
private long filePos;
private int bufferPos;
public DirectIOLinuxIndexInput(File path, int bufferSize) throws IOException {
// TODO make use of IOContext
super("DirectIOLinuxIndexInput(path=\"" + path.getPath() + "\")");
FileDescriptor fd = NativePosixUtil.open_direct(path.toString(), true);
public NativeUnixIndexInput(File path, int bufferSize) throws IOException {
super("NativeUnixIndexInput(path=\"" + path.getPath() + "\")");
final FileDescriptor fd = NativePosixUtil.open_direct(path.toString(), true);
fis = new FileInputStream(fd);
channel = fis.getChannel();
this.bufferSize = bufferSize;
@ -260,7 +311,7 @@ public class DirectIOLinuxDirectory extends FSDirectory {
}
// for clone
public DirectIOLinuxIndexInput(DirectIOLinuxIndexInput other) throws IOException {
public NativeUnixIndexInput(NativeUnixIndexInput other) throws IOException {
super(other.toString());
this.fis = null;
channel = other.channel;
@ -296,13 +347,17 @@ public class DirectIOLinuxDirectory extends FSDirectory {
public void seek(long pos) throws IOException {
if (pos != getFilePointer()) {
final long alignedPos = pos & ALIGN_NOT_MASK;
//System.out.println("seek pos=" + pos + " aligned=" + alignedPos + " bufferSize=" + bufferSize + " this=" + this);
filePos = alignedPos-bufferSize;
refill();
final int delta = (int) (pos - alignedPos);
buffer.position(delta);
bufferPos = delta;
if (delta != 0) {
refill();
buffer.position(delta);
bufferPos = delta;
} else {
// force refill on next read
bufferPos = bufferSize;
}
}
}
@ -371,7 +426,7 @@ public class DirectIOLinuxDirectory extends FSDirectory {
@Override
public Object clone() {
try {
return new DirectIOLinuxIndexInput(this);
return new NativeUnixIndexInput(this);
} catch (IOException ioe) {
throw new RuntimeException("IOException during clone: " + this, ioe);
}

View File

@ -27,33 +27,29 @@
The misc package has various tools for splitting/merging indices,
changing norms, finding high freq terms, and others.
<h2>DirectIOLinuxDirectory</h2>
<a name="NativeUnixDirectory"></a>
<h2>NativeUnixDirectory</h2>
<p>
<b>NOTE</b>: This uses C++ sources (accessible via JNI), which you'll
have to compile on your platform. Further, this is a very
platform-specific extensions (runs only on Linux, and likely only on
2.6.x kernels).
have to compile on your platform.
<p>
DirectIOLinuxDirectory is a Directory implementation that bypasses the
OS's buffer cache for any IndexInput and IndexOutput opened through it
(using the linux-specific O_DIRECT flag).
{@link NativeUnixDirectory} is a Directory implementation that bypasses the
OS's buffer cache (using direct IO) for any IndexInput and IndexOutput
used during merging of segments larger than a specified size (default
10 MB). This avoids evicting hot pages that are still in-use for
searching, keeping search more responsive while large merges run.
<p>
Note that doing so typically results in bad performance loss! You
should not use this for searching, but rather for indexing (or maybe
just merging during indexing), to avoid evicting useful pages from the
buffer cache.
See <a target=_top href="http://chbits.blogspot.com/2010/06/lucene-and-fadvisemadvise.html">here</a>
See <a target=_top href="http://blog.mikemccandless.com/2010/06/lucene-and-fadvisemadvise.html">this blog post</a>
for details.
Steps to build:
<ul>
<li> <tt>cd lucene/contrib/misc/</tt>
<li> To compile NativePosixUtil.cpp -> libNativePosixUtil.so on Linux run<tt> ant build-native-unix</tt>.
<li> To compile NativePosixUtil.cpp -> libNativePosixUtil.so, run<tt> ant build-native-unix</tt>.
<li><tt>libNativePosixUtil.so</tt> will be located in the <tt>lucene/build/native/</tt> folder
@ -62,13 +58,6 @@ Steps to build:
<li> <tt>ant jar</tt> to compile the java source and put that JAR on your CLASSPATH
</ul>
<p>
To use this, you'll likely want to make a custom subclass of
FSDirectory that only opens direct IndexInput/Output for merging. One
hackish way to do this is to check if the current thread's name starts
with "Lucene Merge Thread". Alternatively, you could use this Dir as
is for all indexing ops, but not for searching.
<p>
NativePosixUtil.cpp/java also expose access to the posix_madvise,
madvise, posix_fadvise functions, which are somewhat more cross