diff --git a/gradle/testing/defaults-tests.gradle b/gradle/testing/defaults-tests.gradle
index dc42a2216b4..160b77eaea9 100644
--- a/gradle/testing/defaults-tests.gradle
+++ b/gradle/testing/defaults-tests.gradle
@@ -132,6 +132,8 @@ allprojects {
if (rootProject.vectorIncubatorJavaVersions.contains(rootProject.runtimeJavaVersion)) {
jvmArgs '--add-modules', 'jdk.incubator.vector'
}
+
+ jvmArgs '--enable-native-access=' + (project.path == ':lucene:core' ? 'ALL-UNNAMED' : 'org.apache.lucene.core')
def loggingConfigFile = layout.projectDirectory.file("${resources}/logging.properties")
def tempDir = layout.projectDirectory.dir(testsTmpDir.toString())
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 1249188f4b0..d663af04fe9 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -197,6 +197,11 @@ New Features
* GITHUB#12915: Add new token filters for Japanese sutegana (捨て仮名). This introduces JapaneseHiraganaUppercaseFilter
and JapaneseKatakanaUppercaseFilter. (Dai Sugimori)
+* GITHUB#13196: Add support for posix_madvise to MMapDirectory: If running on Linux/macOS and Java 21
+ or later, MMapDirectory uses IOContext to pass suitable MADV flags to kernel of operating system.
+ This may improve paging logic especially when large segments are merged under memory pressure.
+ (Uwe Schindler, Chris Hegarty, Robert Muir, Adrien Grand)
+
Improvements
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/store/IOContext.java b/lucene/core/src/java/org/apache/lucene/store/IOContext.java
index 1fd45659de0..f72d37359ff 100644
--- a/lucene/core/src/java/org/apache/lucene/store/IOContext.java
+++ b/lucene/core/src/java/org/apache/lucene/store/IOContext.java
@@ -46,24 +46,32 @@ public class IOContext {
/** This flag indicates that the file will be opened, then fully read sequentially then closed. */
public final boolean readOnce;
+ /**
+ * This flag indicates that the file will be accessed randomly. If this flag is set, then readOnce
+ * will be false.
+ */
+ public final boolean randomAccess;
+
/**
* This flag is used for files that are a small fraction of the total index size and are expected
* to be heavily accessed in random-access fashion. Some {@link Directory} implementations may
* choose to load such files into physical memory (e.g. Java heap) as a way to provide stronger
- * guarantees on query latency.
+ * guarantees on query latency. If this flag is set, then {@link #randomAccess} will be true.
*/
public final boolean load;
public static final IOContext DEFAULT = new IOContext(Context.DEFAULT);
- public static final IOContext READONCE = new IOContext(true, false);
+ public static final IOContext READONCE = new IOContext(true, false, false);
- public static final IOContext READ = new IOContext(false, false);
+ public static final IOContext READ = new IOContext(false, false, false);
- public static final IOContext LOAD = new IOContext(false, true);
+ public static final IOContext LOAD = new IOContext(false, true, true);
+
+ public static final IOContext RANDOM = new IOContext(false, false, true);
public IOContext() {
- this(false, false);
+ this(false, false, false);
}
public IOContext(FlushInfo flushInfo) {
@@ -72,6 +80,7 @@ public class IOContext {
this.mergeInfo = null;
this.readOnce = false;
this.load = false;
+ this.randomAccess = false;
this.flushInfo = flushInfo;
}
@@ -79,11 +88,18 @@ public class IOContext {
this(context, null);
}
- private IOContext(boolean readOnce, boolean load) {
+ private IOContext(boolean readOnce, boolean load, boolean randomAccess) {
+ if (readOnce && randomAccess) {
+ throw new IllegalArgumentException("cannot be both readOnce and randomAccess");
+ }
+ if (load && randomAccess == false) {
+ throw new IllegalArgumentException("cannot be load but not randomAccess");
+ }
this.context = Context.READ;
this.mergeInfo = null;
this.readOnce = readOnce;
this.load = load;
+ this.randomAccess = randomAccess;
this.flushInfo = null;
}
@@ -98,6 +114,7 @@ public class IOContext {
this.context = context;
this.readOnce = false;
this.load = false;
+ this.randomAccess = false;
this.mergeInfo = mergeInfo;
this.flushInfo = null;
}
@@ -115,12 +132,13 @@ public class IOContext {
this.mergeInfo = ctxt.mergeInfo;
this.flushInfo = ctxt.flushInfo;
this.readOnce = readOnce;
+ this.randomAccess = ctxt.randomAccess;
this.load = false;
}
@Override
public int hashCode() {
- return Objects.hash(context, flushInfo, mergeInfo, readOnce, load);
+ return Objects.hash(context, flushInfo, mergeInfo, readOnce, load, randomAccess);
}
@Override
@@ -134,6 +152,7 @@ public class IOContext {
if (!Objects.equals(mergeInfo, other.mergeInfo)) return false;
if (readOnce != other.readOnce) return false;
if (load != other.load) return false;
+ if (randomAccess != other.randomAccess) return false;
return true;
}
@@ -147,6 +166,10 @@ public class IOContext {
+ flushInfo
+ ", readOnce="
+ readOnce
+ + ", load="
+ + load
+ + ", randomAccess="
+ + randomAccess
+ "]";
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
index 5638f7c33d0..949f0ef410d 100644
--- a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
+++ b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
@@ -48,6 +48,13 @@ import org.apache.lucene.util.Constants;
* of box with some compilation tricks. For more information about the foreign memory API read
* documentation of the {@link java.lang.foreign} package.
*
+ *
On some platforms like Linux and MacOS X, this class will invoke the syscall {@code madvise()}
+ * to advise how OS kernel should handle paging after opening a file. For this to work, Java code
+ * must be able to call native code. If this is not allowed, a warning is logged. To enable native
+ * access for Lucene in a modularized application, pass {@code
+ * --enable-native-access=org.apache.lucene.core} to the Java command line. If Lucene is running in
+ * a classpath-based application, use {@code --enable-native-access=ALL-UNNAMED}.
+ *
*
NOTE: Accessing this class either directly or indirectly from a thread while it's
* interrupted can close the underlying channel immediately if at the same time the thread is
* blocked on IO. The channel will remain closed and subsequent access to {@link MMapDirectory} will
@@ -204,6 +211,8 @@ public class MMapDirectory extends FSDirectory {
long getDefaultMaxChunkSize();
+ boolean supportsMadvise();
+
default IOException convertMapFailedIOException(
IOException ioe, String resourceDescription, long bufSize) {
final String originalMessage;
@@ -269,6 +278,14 @@ public class MMapDirectory extends FSDirectory {
}
}
+ /**
+ * Returns true, if MMapDirectory uses the platform's {@code madvise()} syscall to advise how OS
+ * kernel should handle paging after opening a file.
+ */
+ public static boolean supportsMadvise() {
+ return PROVIDER.supportsMadvise();
+ }
+
static {
PROVIDER = lookupProvider();
DEFAULT_MAX_CHUNK_SIZE = PROVIDER.getDefaultMaxChunkSize();
diff --git a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInputProvider.java b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInputProvider.java
index 7ccd89ff2bb..3a0787f4c0a 100644
--- a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInputProvider.java
+++ b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInputProvider.java
@@ -23,12 +23,19 @@ import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
+import java.util.Optional;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.Unwrappable;
@SuppressWarnings("preview")
final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexInputProvider {
+ private final Optional nativeAccess;
+
+ MemorySegmentIndexInputProvider() {
+ this.nativeAccess = NativeAccess.getImplementation();
+ }
+
@Override
public IndexInput openInput(Path path, IOContext context, int chunkSizePower, boolean preload)
throws IOException {
@@ -45,7 +52,7 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
MemorySegmentIndexInput.newInstance(
resourceDescription,
arena,
- map(arena, resourceDescription, fc, chunkSizePower, preload, fileSize),
+ map(arena, resourceDescription, fc, context, chunkSizePower, preload, fileSize),
fileSize,
chunkSizePower);
success = true;
@@ -62,10 +69,16 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
return Constants.JRE_IS_64BIT ? (1L << 34) : (1L << 28);
}
+ @Override
+ public boolean supportsMadvise() {
+ return nativeAccess.isPresent();
+ }
+
private final MemorySegment[] map(
Arena arena,
String resourceDescription,
FileChannel fc,
+ IOContext context,
int chunkSizePower,
boolean preload,
long length)
@@ -90,8 +103,12 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
} catch (IOException ioe) {
throw convertMapFailedIOException(ioe, resourceDescription, segSize);
}
+ // if preload apply it without madvise.
+ // if chunk size is too small (2 MiB), disable madvise support (incorrect alignment)
if (preload) {
segment.load();
+ } else if (nativeAccess.isPresent() && chunkSizePower >= 21) {
+ nativeAccess.get().madvise(segment, context);
}
segments[segNr] = segment;
startOffset += segSize;
diff --git a/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java b/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java
new file mode 100644
index 00000000000..30c37901e5c
--- /dev/null
+++ b/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.store;
+
+import java.io.IOException;
+import java.lang.foreign.MemorySegment;
+import java.util.Optional;
+import org.apache.lucene.util.Constants;
+
+@SuppressWarnings("preview")
+abstract class NativeAccess {
+
+ /** Invoke the {@code madvise} call for the given {@link MemorySegment}. */
+ public abstract void madvise(MemorySegment segment, IOContext context) throws IOException;
+
+ /**
+ * Return the NativeAccess instance for this platform. At moment we only support Linux and MacOS
+ */
+ public static Optional getImplementation() {
+ if (Constants.LINUX || Constants.MAC_OS_X) {
+ return PosixNativeAccess.getInstance();
+ }
+ return Optional.empty();
+ }
+}
diff --git a/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java b/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java
new file mode 100644
index 00000000000..f34aa1e2164
--- /dev/null
+++ b/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.store;
+
+import java.io.IOException;
+import java.lang.foreign.FunctionDescriptor;
+import java.lang.foreign.Linker;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.SymbolLookup;
+import java.lang.foreign.ValueLayout;
+import java.lang.invoke.MethodHandle;
+import java.util.Locale;
+import java.util.Optional;
+import java.util.logging.Logger;
+import org.apache.lucene.store.IOContext.Context;
+
+@SuppressWarnings("preview")
+final class PosixNativeAccess extends NativeAccess {
+
+ private static final Logger LOG = Logger.getLogger(PosixNativeAccess.class.getName());
+
+ // these constants were extracted from glibc and macos header files - luckily they are the same:
+
+ /** No further special treatment. */
+ public static final int POSIX_MADV_NORMAL = 0;
+
+ /** Expect random page references. */
+ public static final int POSIX_MADV_RANDOM = 1;
+
+ /** Expect sequential page references. */
+ public static final int POSIX_MADV_SEQUENTIAL = 2;
+
+ /** Will need these pages. */
+ public static final int POSIX_MADV_WILLNEED = 3;
+
+ /** Don't need these pages. */
+ public static final int POSIX_MADV_DONTNEED = 4;
+
+ private static final MethodHandle MH$posix_madvise;
+
+ private static final Optional INSTANCE;
+
+ private PosixNativeAccess() {}
+
+ static Optional getInstance() {
+ return INSTANCE;
+ }
+
+ static {
+ MethodHandle adviseHandle = null;
+ PosixNativeAccess instance = null;
+ try {
+ adviseHandle = lookupMadvise();
+ instance = new PosixNativeAccess();
+ } catch (UnsupportedOperationException uoe) {
+ LOG.warning(uoe.getMessage());
+ } catch (
+ @SuppressWarnings("unused")
+ IllegalCallerException ice) {
+ LOG.warning(
+ String.format(
+ Locale.ENGLISH,
+ "Lucene has no access to native functions. To enable access to native functions, "
+ + "pass the following on command line: --enable-native-access=%s",
+ Optional.ofNullable(PosixNativeAccess.class.getModule().getName())
+ .orElse("ALL-UNNAMED")));
+ }
+ MH$posix_madvise = adviseHandle;
+ INSTANCE = Optional.ofNullable(instance);
+ }
+
+ private static MethodHandle lookupMadvise() {
+ final Linker linker = Linker.nativeLinker();
+ final SymbolLookup stdlib = linker.defaultLookup();
+ final MethodHandle mh =
+ findFunction(
+ linker,
+ stdlib,
+ "posix_madvise",
+ FunctionDescriptor.of(
+ ValueLayout.JAVA_INT,
+ ValueLayout.ADDRESS,
+ ValueLayout.JAVA_LONG,
+ ValueLayout.JAVA_INT));
+ LOG.info("posix_madvise() available on this platform");
+ return mh;
+ }
+
+ private static MethodHandle findFunction(
+ Linker linker, SymbolLookup lookup, String name, FunctionDescriptor desc) {
+ final MemorySegment symbol =
+ lookup
+ .find(name)
+ .orElseThrow(
+ () ->
+ new UnsupportedOperationException(
+ "Platform has no symbol for '" + name + "' in libc."));
+ return linker.downcallHandle(symbol, desc);
+ }
+
+ @Override
+ public void madvise(MemorySegment segment, IOContext context) throws IOException {
+ // Note: madvise is bypassed if the segment should be preloaded via MemorySegment#load.
+ if (segment.byteSize() == 0L) {
+ return; // empty segments should be excluded, because they may have no address at all
+ }
+ final Integer advice = mapIOContext(context);
+ if (advice == null) {
+ return; // do nothing
+ }
+ final int ret;
+ try {
+ ret = (int) MH$posix_madvise.invokeExact(segment, segment.byteSize(), advice.intValue());
+ } catch (Throwable th) {
+ throw new AssertionError(th);
+ }
+ if (ret != 0) {
+ throw new IOException(
+ String.format(
+ Locale.ENGLISH,
+ "Call to posix_madvise with address=0x%08X and byteSize=%d failed with return code %d.",
+ segment.address(),
+ segment.byteSize(),
+ ret));
+ }
+ }
+
+ private Integer mapIOContext(IOContext ctx) {
+ // Merging always wins and implies sequential access, because kernel is advised to free pages
+ // after use:
+ if (ctx.context == Context.MERGE) {
+ return POSIX_MADV_SEQUENTIAL;
+ }
+ if (ctx.randomAccess) {
+ return POSIX_MADV_RANDOM;
+ }
+ if (ctx.readOnce) {
+ return POSIX_MADV_SEQUENTIAL;
+ }
+ return null;
+ }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/store/TestMMapDirectory.java b/lucene/core/src/test/org/apache/lucene/store/TestMMapDirectory.java
index 611dd6b1c82..edc5d369067 100644
--- a/lucene/core/src/test/org/apache/lucene/store/TestMMapDirectory.java
+++ b/lucene/core/src/test/org/apache/lucene/store/TestMMapDirectory.java
@@ -21,6 +21,7 @@ import java.nio.file.Path;
import java.util.Random;
import java.util.concurrent.CountDownLatch;
import org.apache.lucene.tests.store.BaseDirectoryTestCase;
+import org.apache.lucene.util.Constants;
/** Tests MMapDirectory */
// See: https://issues.apache.org/jira/browse/SOLR-12028 Tests cannot remove files on Windows
@@ -89,4 +90,29 @@ public class TestMMapDirectory extends BaseDirectoryTestCase {
}
}
}
+
+ public void testMadviseAvail() throws Exception {
+ assertEquals(
+ "madvise should be supported on Linux and Macos",
+ Constants.LINUX || Constants.MAC_OS_X,
+ MMapDirectory.supportsMadvise());
+ }
+
+ // Opens the input with IOContext.RANDOM to ensure basic code path coverage for POSIX_MADV_RANDOM.
+ public void testWithRandom() throws Exception {
+ final int size = 8 * 1024 * 1024; // large enough to trigger madvise
+ byte[] bytes = new byte[size];
+ random().nextBytes(bytes);
+
+ try (Directory dir = new MMapDirectory(createTempDir("testWithRandom"))) {
+ try (IndexOutput out = dir.createOutput("test", IOContext.DEFAULT)) {
+ out.writeBytes(bytes, 0, bytes.length);
+ }
+
+ final IndexInput in = dir.openInput("test", IOContext.RANDOM);
+ final byte[] readBytes = new byte[size];
+ in.readBytes(readBytes, 0, readBytes.length);
+ assertArrayEquals(bytes, readBytes);
+ }
+ }
}