mirror of https://github.com/apache/lucene.git
Add support for posix_madvise to Java 21 MMapDirectory (#13196)
This commit is contained in:
parent
f4db67fae2
commit
a4055dae62
|
@ -132,6 +132,8 @@ allprojects {
|
|||
if (rootProject.vectorIncubatorJavaVersions.contains(rootProject.runtimeJavaVersion)) {
|
||||
jvmArgs '--add-modules', 'jdk.incubator.vector'
|
||||
}
|
||||
|
||||
jvmArgs '--enable-native-access=' + (project.path == ':lucene:core' ? 'ALL-UNNAMED' : 'org.apache.lucene.core')
|
||||
|
||||
def loggingConfigFile = layout.projectDirectory.file("${resources}/logging.properties")
|
||||
def tempDir = layout.projectDirectory.dir(testsTmpDir.toString())
|
||||
|
|
|
@ -197,6 +197,11 @@ New Features
|
|||
* GITHUB#12915: Add new token filters for Japanese sutegana (捨て仮名). This introduces JapaneseHiraganaUppercaseFilter
|
||||
and JapaneseKatakanaUppercaseFilter. (Dai Sugimori)
|
||||
|
||||
* GITHUB#13196: Add support for posix_madvise to MMapDirectory: If running on Linux/macOS and Java 21
|
||||
or later, MMapDirectory uses IOContext to pass suitable MADV flags to kernel of operating system.
|
||||
This may improve paging logic especially when large segments are merged under memory pressure.
|
||||
(Uwe Schindler, Chris Hegarty, Robert Muir, Adrien Grand)
|
||||
|
||||
Improvements
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -46,24 +46,32 @@ public class IOContext {
|
|||
/** This flag indicates that the file will be opened, then fully read sequentially then closed. */
|
||||
public final boolean readOnce;
|
||||
|
||||
/**
|
||||
* This flag indicates that the file will be accessed randomly. If this flag is set, then readOnce
|
||||
* will be false.
|
||||
*/
|
||||
public final boolean randomAccess;
|
||||
|
||||
/**
|
||||
* This flag is used for files that are a small fraction of the total index size and are expected
|
||||
* to be heavily accessed in random-access fashion. Some {@link Directory} implementations may
|
||||
* choose to load such files into physical memory (e.g. Java heap) as a way to provide stronger
|
||||
* guarantees on query latency.
|
||||
* guarantees on query latency. If this flag is set, then {@link #randomAccess} will be true.
|
||||
*/
|
||||
public final boolean load;
|
||||
|
||||
public static final IOContext DEFAULT = new IOContext(Context.DEFAULT);
|
||||
|
||||
public static final IOContext READONCE = new IOContext(true, false);
|
||||
public static final IOContext READONCE = new IOContext(true, false, false);
|
||||
|
||||
public static final IOContext READ = new IOContext(false, false);
|
||||
public static final IOContext READ = new IOContext(false, false, false);
|
||||
|
||||
public static final IOContext LOAD = new IOContext(false, true);
|
||||
public static final IOContext LOAD = new IOContext(false, true, true);
|
||||
|
||||
public static final IOContext RANDOM = new IOContext(false, false, true);
|
||||
|
||||
public IOContext() {
|
||||
this(false, false);
|
||||
this(false, false, false);
|
||||
}
|
||||
|
||||
public IOContext(FlushInfo flushInfo) {
|
||||
|
@ -72,6 +80,7 @@ public class IOContext {
|
|||
this.mergeInfo = null;
|
||||
this.readOnce = false;
|
||||
this.load = false;
|
||||
this.randomAccess = false;
|
||||
this.flushInfo = flushInfo;
|
||||
}
|
||||
|
||||
|
@ -79,11 +88,18 @@ public class IOContext {
|
|||
this(context, null);
|
||||
}
|
||||
|
||||
private IOContext(boolean readOnce, boolean load) {
|
||||
private IOContext(boolean readOnce, boolean load, boolean randomAccess) {
|
||||
if (readOnce && randomAccess) {
|
||||
throw new IllegalArgumentException("cannot be both readOnce and randomAccess");
|
||||
}
|
||||
if (load && randomAccess == false) {
|
||||
throw new IllegalArgumentException("cannot be load but not randomAccess");
|
||||
}
|
||||
this.context = Context.READ;
|
||||
this.mergeInfo = null;
|
||||
this.readOnce = readOnce;
|
||||
this.load = load;
|
||||
this.randomAccess = randomAccess;
|
||||
this.flushInfo = null;
|
||||
}
|
||||
|
||||
|
@ -98,6 +114,7 @@ public class IOContext {
|
|||
this.context = context;
|
||||
this.readOnce = false;
|
||||
this.load = false;
|
||||
this.randomAccess = false;
|
||||
this.mergeInfo = mergeInfo;
|
||||
this.flushInfo = null;
|
||||
}
|
||||
|
@ -115,12 +132,13 @@ public class IOContext {
|
|||
this.mergeInfo = ctxt.mergeInfo;
|
||||
this.flushInfo = ctxt.flushInfo;
|
||||
this.readOnce = readOnce;
|
||||
this.randomAccess = ctxt.randomAccess;
|
||||
this.load = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(context, flushInfo, mergeInfo, readOnce, load);
|
||||
return Objects.hash(context, flushInfo, mergeInfo, readOnce, load, randomAccess);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -134,6 +152,7 @@ public class IOContext {
|
|||
if (!Objects.equals(mergeInfo, other.mergeInfo)) return false;
|
||||
if (readOnce != other.readOnce) return false;
|
||||
if (load != other.load) return false;
|
||||
if (randomAccess != other.randomAccess) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -147,6 +166,10 @@ public class IOContext {
|
|||
+ flushInfo
|
||||
+ ", readOnce="
|
||||
+ readOnce
|
||||
+ ", load="
|
||||
+ load
|
||||
+ ", randomAccess="
|
||||
+ randomAccess
|
||||
+ "]";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,6 +48,13 @@ import org.apache.lucene.util.Constants;
|
|||
* of box with some compilation tricks. For more information about the foreign memory API read
|
||||
* documentation of the {@link java.lang.foreign} package.
|
||||
*
|
||||
* <p>On some platforms like Linux and MacOS X, this class will invoke the syscall {@code madvise()}
|
||||
* to advise how OS kernel should handle paging after opening a file. For this to work, Java code
|
||||
* must be able to call native code. If this is not allowed, a warning is logged. To enable native
|
||||
* access for Lucene in a modularized application, pass {@code
|
||||
* --enable-native-access=org.apache.lucene.core} to the Java command line. If Lucene is running in
|
||||
* a classpath-based application, use {@code --enable-native-access=ALL-UNNAMED}.
|
||||
*
|
||||
* <p><b>NOTE:</b> Accessing this class either directly or indirectly from a thread while it's
|
||||
* interrupted can close the underlying channel immediately if at the same time the thread is
|
||||
* blocked on IO. The channel will remain closed and subsequent access to {@link MMapDirectory} will
|
||||
|
@ -204,6 +211,8 @@ public class MMapDirectory extends FSDirectory {
|
|||
|
||||
long getDefaultMaxChunkSize();
|
||||
|
||||
boolean supportsMadvise();
|
||||
|
||||
default IOException convertMapFailedIOException(
|
||||
IOException ioe, String resourceDescription, long bufSize) {
|
||||
final String originalMessage;
|
||||
|
@ -269,6 +278,14 @@ public class MMapDirectory extends FSDirectory {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true, if MMapDirectory uses the platform's {@code madvise()} syscall to advise how OS
|
||||
* kernel should handle paging after opening a file.
|
||||
*/
|
||||
public static boolean supportsMadvise() {
|
||||
return PROVIDER.supportsMadvise();
|
||||
}
|
||||
|
||||
static {
|
||||
PROVIDER = lookupProvider();
|
||||
DEFAULT_MAX_CHUNK_SIZE = PROVIDER.getDefaultMaxChunkSize();
|
||||
|
|
|
@ -23,12 +23,19 @@ import java.nio.channels.FileChannel;
|
|||
import java.nio.channels.FileChannel.MapMode;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.Optional;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.lucene.util.Unwrappable;
|
||||
|
||||
@SuppressWarnings("preview")
|
||||
final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexInputProvider {
|
||||
|
||||
private final Optional<NativeAccess> nativeAccess;
|
||||
|
||||
MemorySegmentIndexInputProvider() {
|
||||
this.nativeAccess = NativeAccess.getImplementation();
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexInput openInput(Path path, IOContext context, int chunkSizePower, boolean preload)
|
||||
throws IOException {
|
||||
|
@ -45,7 +52,7 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
|
|||
MemorySegmentIndexInput.newInstance(
|
||||
resourceDescription,
|
||||
arena,
|
||||
map(arena, resourceDescription, fc, chunkSizePower, preload, fileSize),
|
||||
map(arena, resourceDescription, fc, context, chunkSizePower, preload, fileSize),
|
||||
fileSize,
|
||||
chunkSizePower);
|
||||
success = true;
|
||||
|
@ -62,10 +69,16 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
|
|||
return Constants.JRE_IS_64BIT ? (1L << 34) : (1L << 28);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean supportsMadvise() {
|
||||
return nativeAccess.isPresent();
|
||||
}
|
||||
|
||||
private final MemorySegment[] map(
|
||||
Arena arena,
|
||||
String resourceDescription,
|
||||
FileChannel fc,
|
||||
IOContext context,
|
||||
int chunkSizePower,
|
||||
boolean preload,
|
||||
long length)
|
||||
|
@ -90,8 +103,12 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
|
|||
} catch (IOException ioe) {
|
||||
throw convertMapFailedIOException(ioe, resourceDescription, segSize);
|
||||
}
|
||||
// if preload apply it without madvise.
|
||||
// if chunk size is too small (2 MiB), disable madvise support (incorrect alignment)
|
||||
if (preload) {
|
||||
segment.load();
|
||||
} else if (nativeAccess.isPresent() && chunkSizePower >= 21) {
|
||||
nativeAccess.get().madvise(segment, context);
|
||||
}
|
||||
segments[segNr] = segment;
|
||||
startOffset += segSize;
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.store;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.util.Optional;
|
||||
import org.apache.lucene.util.Constants;
|
||||
|
||||
@SuppressWarnings("preview")
|
||||
abstract class NativeAccess {
|
||||
|
||||
/** Invoke the {@code madvise} call for the given {@link MemorySegment}. */
|
||||
public abstract void madvise(MemorySegment segment, IOContext context) throws IOException;
|
||||
|
||||
/**
|
||||
* Return the NativeAccess instance for this platform. At moment we only support Linux and MacOS
|
||||
*/
|
||||
public static Optional<NativeAccess> getImplementation() {
|
||||
if (Constants.LINUX || Constants.MAC_OS_X) {
|
||||
return PosixNativeAccess.getInstance();
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.store;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.FunctionDescriptor;
|
||||
import java.lang.foreign.Linker;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.SymbolLookup;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.lang.invoke.MethodHandle;
|
||||
import java.util.Locale;
|
||||
import java.util.Optional;
|
||||
import java.util.logging.Logger;
|
||||
import org.apache.lucene.store.IOContext.Context;
|
||||
|
||||
@SuppressWarnings("preview")
|
||||
final class PosixNativeAccess extends NativeAccess {
|
||||
|
||||
private static final Logger LOG = Logger.getLogger(PosixNativeAccess.class.getName());
|
||||
|
||||
// these constants were extracted from glibc and macos header files - luckily they are the same:
|
||||
|
||||
/** No further special treatment. */
|
||||
public static final int POSIX_MADV_NORMAL = 0;
|
||||
|
||||
/** Expect random page references. */
|
||||
public static final int POSIX_MADV_RANDOM = 1;
|
||||
|
||||
/** Expect sequential page references. */
|
||||
public static final int POSIX_MADV_SEQUENTIAL = 2;
|
||||
|
||||
/** Will need these pages. */
|
||||
public static final int POSIX_MADV_WILLNEED = 3;
|
||||
|
||||
/** Don't need these pages. */
|
||||
public static final int POSIX_MADV_DONTNEED = 4;
|
||||
|
||||
private static final MethodHandle MH$posix_madvise;
|
||||
|
||||
private static final Optional<NativeAccess> INSTANCE;
|
||||
|
||||
private PosixNativeAccess() {}
|
||||
|
||||
static Optional<NativeAccess> getInstance() {
|
||||
return INSTANCE;
|
||||
}
|
||||
|
||||
static {
|
||||
MethodHandle adviseHandle = null;
|
||||
PosixNativeAccess instance = null;
|
||||
try {
|
||||
adviseHandle = lookupMadvise();
|
||||
instance = new PosixNativeAccess();
|
||||
} catch (UnsupportedOperationException uoe) {
|
||||
LOG.warning(uoe.getMessage());
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
IllegalCallerException ice) {
|
||||
LOG.warning(
|
||||
String.format(
|
||||
Locale.ENGLISH,
|
||||
"Lucene has no access to native functions. To enable access to native functions, "
|
||||
+ "pass the following on command line: --enable-native-access=%s",
|
||||
Optional.ofNullable(PosixNativeAccess.class.getModule().getName())
|
||||
.orElse("ALL-UNNAMED")));
|
||||
}
|
||||
MH$posix_madvise = adviseHandle;
|
||||
INSTANCE = Optional.ofNullable(instance);
|
||||
}
|
||||
|
||||
private static MethodHandle lookupMadvise() {
|
||||
final Linker linker = Linker.nativeLinker();
|
||||
final SymbolLookup stdlib = linker.defaultLookup();
|
||||
final MethodHandle mh =
|
||||
findFunction(
|
||||
linker,
|
||||
stdlib,
|
||||
"posix_madvise",
|
||||
FunctionDescriptor.of(
|
||||
ValueLayout.JAVA_INT,
|
||||
ValueLayout.ADDRESS,
|
||||
ValueLayout.JAVA_LONG,
|
||||
ValueLayout.JAVA_INT));
|
||||
LOG.info("posix_madvise() available on this platform");
|
||||
return mh;
|
||||
}
|
||||
|
||||
private static MethodHandle findFunction(
|
||||
Linker linker, SymbolLookup lookup, String name, FunctionDescriptor desc) {
|
||||
final MemorySegment symbol =
|
||||
lookup
|
||||
.find(name)
|
||||
.orElseThrow(
|
||||
() ->
|
||||
new UnsupportedOperationException(
|
||||
"Platform has no symbol for '" + name + "' in libc."));
|
||||
return linker.downcallHandle(symbol, desc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void madvise(MemorySegment segment, IOContext context) throws IOException {
|
||||
// Note: madvise is bypassed if the segment should be preloaded via MemorySegment#load.
|
||||
if (segment.byteSize() == 0L) {
|
||||
return; // empty segments should be excluded, because they may have no address at all
|
||||
}
|
||||
final Integer advice = mapIOContext(context);
|
||||
if (advice == null) {
|
||||
return; // do nothing
|
||||
}
|
||||
final int ret;
|
||||
try {
|
||||
ret = (int) MH$posix_madvise.invokeExact(segment, segment.byteSize(), advice.intValue());
|
||||
} catch (Throwable th) {
|
||||
throw new AssertionError(th);
|
||||
}
|
||||
if (ret != 0) {
|
||||
throw new IOException(
|
||||
String.format(
|
||||
Locale.ENGLISH,
|
||||
"Call to posix_madvise with address=0x%08X and byteSize=%d failed with return code %d.",
|
||||
segment.address(),
|
||||
segment.byteSize(),
|
||||
ret));
|
||||
}
|
||||
}
|
||||
|
||||
private Integer mapIOContext(IOContext ctx) {
|
||||
// Merging always wins and implies sequential access, because kernel is advised to free pages
|
||||
// after use:
|
||||
if (ctx.context == Context.MERGE) {
|
||||
return POSIX_MADV_SEQUENTIAL;
|
||||
}
|
||||
if (ctx.randomAccess) {
|
||||
return POSIX_MADV_RANDOM;
|
||||
}
|
||||
if (ctx.readOnce) {
|
||||
return POSIX_MADV_SEQUENTIAL;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -21,6 +21,7 @@ import java.nio.file.Path;
|
|||
import java.util.Random;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import org.apache.lucene.tests.store.BaseDirectoryTestCase;
|
||||
import org.apache.lucene.util.Constants;
|
||||
|
||||
/** Tests MMapDirectory */
|
||||
// See: https://issues.apache.org/jira/browse/SOLR-12028 Tests cannot remove files on Windows
|
||||
|
@ -89,4 +90,29 @@ public class TestMMapDirectory extends BaseDirectoryTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testMadviseAvail() throws Exception {
|
||||
assertEquals(
|
||||
"madvise should be supported on Linux and Macos",
|
||||
Constants.LINUX || Constants.MAC_OS_X,
|
||||
MMapDirectory.supportsMadvise());
|
||||
}
|
||||
|
||||
// Opens the input with IOContext.RANDOM to ensure basic code path coverage for POSIX_MADV_RANDOM.
|
||||
public void testWithRandom() throws Exception {
|
||||
final int size = 8 * 1024 * 1024; // large enough to trigger madvise
|
||||
byte[] bytes = new byte[size];
|
||||
random().nextBytes(bytes);
|
||||
|
||||
try (Directory dir = new MMapDirectory(createTempDir("testWithRandom"))) {
|
||||
try (IndexOutput out = dir.createOutput("test", IOContext.DEFAULT)) {
|
||||
out.writeBytes(bytes, 0, bytes.length);
|
||||
}
|
||||
|
||||
final IndexInput in = dir.openInput("test", IOContext.RANDOM);
|
||||
final byte[] readBytes = new byte[size];
|
||||
in.readBytes(readBytes, 0, readBytes.length);
|
||||
assertArrayEquals(bytes, readBytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue