diff --git a/gradle/testing/defaults-tests.gradle b/gradle/testing/defaults-tests.gradle index dc42a2216b4..160b77eaea9 100644 --- a/gradle/testing/defaults-tests.gradle +++ b/gradle/testing/defaults-tests.gradle @@ -132,6 +132,8 @@ allprojects { if (rootProject.vectorIncubatorJavaVersions.contains(rootProject.runtimeJavaVersion)) { jvmArgs '--add-modules', 'jdk.incubator.vector' } + + jvmArgs '--enable-native-access=' + (project.path == ':lucene:core' ? 'ALL-UNNAMED' : 'org.apache.lucene.core') def loggingConfigFile = layout.projectDirectory.file("${resources}/logging.properties") def tempDir = layout.projectDirectory.dir(testsTmpDir.toString()) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 1249188f4b0..d663af04fe9 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -197,6 +197,11 @@ New Features * GITHUB#12915: Add new token filters for Japanese sutegana (捨て仮名). This introduces JapaneseHiraganaUppercaseFilter and JapaneseKatakanaUppercaseFilter. (Dai Sugimori) +* GITHUB#13196: Add support for posix_madvise to MMapDirectory: If running on Linux/macOS and Java 21 + or later, MMapDirectory uses IOContext to pass suitable MADV flags to kernel of operating system. + This may improve paging logic especially when large segments are merged under memory pressure. + (Uwe Schindler, Chris Hegarty, Robert Muir, Adrien Grand) + Improvements --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/store/IOContext.java b/lucene/core/src/java/org/apache/lucene/store/IOContext.java index 1fd45659de0..f72d37359ff 100644 --- a/lucene/core/src/java/org/apache/lucene/store/IOContext.java +++ b/lucene/core/src/java/org/apache/lucene/store/IOContext.java @@ -46,24 +46,32 @@ public class IOContext { /** This flag indicates that the file will be opened, then fully read sequentially then closed. */ public final boolean readOnce; + /** + * This flag indicates that the file will be accessed randomly. If this flag is set, then readOnce + * will be false. + */ + public final boolean randomAccess; + /** * This flag is used for files that are a small fraction of the total index size and are expected * to be heavily accessed in random-access fashion. Some {@link Directory} implementations may * choose to load such files into physical memory (e.g. Java heap) as a way to provide stronger - * guarantees on query latency. + * guarantees on query latency. If this flag is set, then {@link #randomAccess} will be true. */ public final boolean load; public static final IOContext DEFAULT = new IOContext(Context.DEFAULT); - public static final IOContext READONCE = new IOContext(true, false); + public static final IOContext READONCE = new IOContext(true, false, false); - public static final IOContext READ = new IOContext(false, false); + public static final IOContext READ = new IOContext(false, false, false); - public static final IOContext LOAD = new IOContext(false, true); + public static final IOContext LOAD = new IOContext(false, true, true); + + public static final IOContext RANDOM = new IOContext(false, false, true); public IOContext() { - this(false, false); + this(false, false, false); } public IOContext(FlushInfo flushInfo) { @@ -72,6 +80,7 @@ public class IOContext { this.mergeInfo = null; this.readOnce = false; this.load = false; + this.randomAccess = false; this.flushInfo = flushInfo; } @@ -79,11 +88,18 @@ public class IOContext { this(context, null); } - private IOContext(boolean readOnce, boolean load) { + private IOContext(boolean readOnce, boolean load, boolean randomAccess) { + if (readOnce && randomAccess) { + throw new IllegalArgumentException("cannot be both readOnce and randomAccess"); + } + if (load && randomAccess == false) { + throw new IllegalArgumentException("cannot be load but not randomAccess"); + } this.context = Context.READ; this.mergeInfo = null; this.readOnce = readOnce; this.load = load; + this.randomAccess = randomAccess; this.flushInfo = null; } @@ -98,6 +114,7 @@ public class IOContext { this.context = context; this.readOnce = false; this.load = false; + this.randomAccess = false; this.mergeInfo = mergeInfo; this.flushInfo = null; } @@ -115,12 +132,13 @@ public class IOContext { this.mergeInfo = ctxt.mergeInfo; this.flushInfo = ctxt.flushInfo; this.readOnce = readOnce; + this.randomAccess = ctxt.randomAccess; this.load = false; } @Override public int hashCode() { - return Objects.hash(context, flushInfo, mergeInfo, readOnce, load); + return Objects.hash(context, flushInfo, mergeInfo, readOnce, load, randomAccess); } @Override @@ -134,6 +152,7 @@ public class IOContext { if (!Objects.equals(mergeInfo, other.mergeInfo)) return false; if (readOnce != other.readOnce) return false; if (load != other.load) return false; + if (randomAccess != other.randomAccess) return false; return true; } @@ -147,6 +166,10 @@ public class IOContext { + flushInfo + ", readOnce=" + readOnce + + ", load=" + + load + + ", randomAccess=" + + randomAccess + "]"; } } diff --git a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java index 5638f7c33d0..949f0ef410d 100644 --- a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java +++ b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java @@ -48,6 +48,13 @@ import org.apache.lucene.util.Constants; * of box with some compilation tricks. For more information about the foreign memory API read * documentation of the {@link java.lang.foreign} package. * + *

On some platforms like Linux and MacOS X, this class will invoke the syscall {@code madvise()} + * to advise how OS kernel should handle paging after opening a file. For this to work, Java code + * must be able to call native code. If this is not allowed, a warning is logged. To enable native + * access for Lucene in a modularized application, pass {@code + * --enable-native-access=org.apache.lucene.core} to the Java command line. If Lucene is running in + * a classpath-based application, use {@code --enable-native-access=ALL-UNNAMED}. + * *

NOTE: Accessing this class either directly or indirectly from a thread while it's * interrupted can close the underlying channel immediately if at the same time the thread is * blocked on IO. The channel will remain closed and subsequent access to {@link MMapDirectory} will @@ -204,6 +211,8 @@ public class MMapDirectory extends FSDirectory { long getDefaultMaxChunkSize(); + boolean supportsMadvise(); + default IOException convertMapFailedIOException( IOException ioe, String resourceDescription, long bufSize) { final String originalMessage; @@ -269,6 +278,14 @@ public class MMapDirectory extends FSDirectory { } } + /** + * Returns true, if MMapDirectory uses the platform's {@code madvise()} syscall to advise how OS + * kernel should handle paging after opening a file. + */ + public static boolean supportsMadvise() { + return PROVIDER.supportsMadvise(); + } + static { PROVIDER = lookupProvider(); DEFAULT_MAX_CHUNK_SIZE = PROVIDER.getDefaultMaxChunkSize(); diff --git a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInputProvider.java b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInputProvider.java index 7ccd89ff2bb..3a0787f4c0a 100644 --- a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInputProvider.java +++ b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInputProvider.java @@ -23,12 +23,19 @@ import java.nio.channels.FileChannel; import java.nio.channels.FileChannel.MapMode; import java.nio.file.Path; import java.nio.file.StandardOpenOption; +import java.util.Optional; import org.apache.lucene.util.Constants; import org.apache.lucene.util.Unwrappable; @SuppressWarnings("preview") final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexInputProvider { + private final Optional nativeAccess; + + MemorySegmentIndexInputProvider() { + this.nativeAccess = NativeAccess.getImplementation(); + } + @Override public IndexInput openInput(Path path, IOContext context, int chunkSizePower, boolean preload) throws IOException { @@ -45,7 +52,7 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn MemorySegmentIndexInput.newInstance( resourceDescription, arena, - map(arena, resourceDescription, fc, chunkSizePower, preload, fileSize), + map(arena, resourceDescription, fc, context, chunkSizePower, preload, fileSize), fileSize, chunkSizePower); success = true; @@ -62,10 +69,16 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn return Constants.JRE_IS_64BIT ? (1L << 34) : (1L << 28); } + @Override + public boolean supportsMadvise() { + return nativeAccess.isPresent(); + } + private final MemorySegment[] map( Arena arena, String resourceDescription, FileChannel fc, + IOContext context, int chunkSizePower, boolean preload, long length) @@ -90,8 +103,12 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn } catch (IOException ioe) { throw convertMapFailedIOException(ioe, resourceDescription, segSize); } + // if preload apply it without madvise. + // if chunk size is too small (2 MiB), disable madvise support (incorrect alignment) if (preload) { segment.load(); + } else if (nativeAccess.isPresent() && chunkSizePower >= 21) { + nativeAccess.get().madvise(segment, context); } segments[segNr] = segment; startOffset += segSize; diff --git a/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java b/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java new file mode 100644 index 00000000000..30c37901e5c --- /dev/null +++ b/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.store; + +import java.io.IOException; +import java.lang.foreign.MemorySegment; +import java.util.Optional; +import org.apache.lucene.util.Constants; + +@SuppressWarnings("preview") +abstract class NativeAccess { + + /** Invoke the {@code madvise} call for the given {@link MemorySegment}. */ + public abstract void madvise(MemorySegment segment, IOContext context) throws IOException; + + /** + * Return the NativeAccess instance for this platform. At moment we only support Linux and MacOS + */ + public static Optional getImplementation() { + if (Constants.LINUX || Constants.MAC_OS_X) { + return PosixNativeAccess.getInstance(); + } + return Optional.empty(); + } +} diff --git a/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java b/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java new file mode 100644 index 00000000000..f34aa1e2164 --- /dev/null +++ b/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.store; + +import java.io.IOException; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.Linker; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SymbolLookup; +import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandle; +import java.util.Locale; +import java.util.Optional; +import java.util.logging.Logger; +import org.apache.lucene.store.IOContext.Context; + +@SuppressWarnings("preview") +final class PosixNativeAccess extends NativeAccess { + + private static final Logger LOG = Logger.getLogger(PosixNativeAccess.class.getName()); + + // these constants were extracted from glibc and macos header files - luckily they are the same: + + /** No further special treatment. */ + public static final int POSIX_MADV_NORMAL = 0; + + /** Expect random page references. */ + public static final int POSIX_MADV_RANDOM = 1; + + /** Expect sequential page references. */ + public static final int POSIX_MADV_SEQUENTIAL = 2; + + /** Will need these pages. */ + public static final int POSIX_MADV_WILLNEED = 3; + + /** Don't need these pages. */ + public static final int POSIX_MADV_DONTNEED = 4; + + private static final MethodHandle MH$posix_madvise; + + private static final Optional INSTANCE; + + private PosixNativeAccess() {} + + static Optional getInstance() { + return INSTANCE; + } + + static { + MethodHandle adviseHandle = null; + PosixNativeAccess instance = null; + try { + adviseHandle = lookupMadvise(); + instance = new PosixNativeAccess(); + } catch (UnsupportedOperationException uoe) { + LOG.warning(uoe.getMessage()); + } catch ( + @SuppressWarnings("unused") + IllegalCallerException ice) { + LOG.warning( + String.format( + Locale.ENGLISH, + "Lucene has no access to native functions. To enable access to native functions, " + + "pass the following on command line: --enable-native-access=%s", + Optional.ofNullable(PosixNativeAccess.class.getModule().getName()) + .orElse("ALL-UNNAMED"))); + } + MH$posix_madvise = adviseHandle; + INSTANCE = Optional.ofNullable(instance); + } + + private static MethodHandle lookupMadvise() { + final Linker linker = Linker.nativeLinker(); + final SymbolLookup stdlib = linker.defaultLookup(); + final MethodHandle mh = + findFunction( + linker, + stdlib, + "posix_madvise", + FunctionDescriptor.of( + ValueLayout.JAVA_INT, + ValueLayout.ADDRESS, + ValueLayout.JAVA_LONG, + ValueLayout.JAVA_INT)); + LOG.info("posix_madvise() available on this platform"); + return mh; + } + + private static MethodHandle findFunction( + Linker linker, SymbolLookup lookup, String name, FunctionDescriptor desc) { + final MemorySegment symbol = + lookup + .find(name) + .orElseThrow( + () -> + new UnsupportedOperationException( + "Platform has no symbol for '" + name + "' in libc.")); + return linker.downcallHandle(symbol, desc); + } + + @Override + public void madvise(MemorySegment segment, IOContext context) throws IOException { + // Note: madvise is bypassed if the segment should be preloaded via MemorySegment#load. + if (segment.byteSize() == 0L) { + return; // empty segments should be excluded, because they may have no address at all + } + final Integer advice = mapIOContext(context); + if (advice == null) { + return; // do nothing + } + final int ret; + try { + ret = (int) MH$posix_madvise.invokeExact(segment, segment.byteSize(), advice.intValue()); + } catch (Throwable th) { + throw new AssertionError(th); + } + if (ret != 0) { + throw new IOException( + String.format( + Locale.ENGLISH, + "Call to posix_madvise with address=0x%08X and byteSize=%d failed with return code %d.", + segment.address(), + segment.byteSize(), + ret)); + } + } + + private Integer mapIOContext(IOContext ctx) { + // Merging always wins and implies sequential access, because kernel is advised to free pages + // after use: + if (ctx.context == Context.MERGE) { + return POSIX_MADV_SEQUENTIAL; + } + if (ctx.randomAccess) { + return POSIX_MADV_RANDOM; + } + if (ctx.readOnce) { + return POSIX_MADV_SEQUENTIAL; + } + return null; + } +} diff --git a/lucene/core/src/test/org/apache/lucene/store/TestMMapDirectory.java b/lucene/core/src/test/org/apache/lucene/store/TestMMapDirectory.java index 611dd6b1c82..edc5d369067 100644 --- a/lucene/core/src/test/org/apache/lucene/store/TestMMapDirectory.java +++ b/lucene/core/src/test/org/apache/lucene/store/TestMMapDirectory.java @@ -21,6 +21,7 @@ import java.nio.file.Path; import java.util.Random; import java.util.concurrent.CountDownLatch; import org.apache.lucene.tests.store.BaseDirectoryTestCase; +import org.apache.lucene.util.Constants; /** Tests MMapDirectory */ // See: https://issues.apache.org/jira/browse/SOLR-12028 Tests cannot remove files on Windows @@ -89,4 +90,29 @@ public class TestMMapDirectory extends BaseDirectoryTestCase { } } } + + public void testMadviseAvail() throws Exception { + assertEquals( + "madvise should be supported on Linux and Macos", + Constants.LINUX || Constants.MAC_OS_X, + MMapDirectory.supportsMadvise()); + } + + // Opens the input with IOContext.RANDOM to ensure basic code path coverage for POSIX_MADV_RANDOM. + public void testWithRandom() throws Exception { + final int size = 8 * 1024 * 1024; // large enough to trigger madvise + byte[] bytes = new byte[size]; + random().nextBytes(bytes); + + try (Directory dir = new MMapDirectory(createTempDir("testWithRandom"))) { + try (IndexOutput out = dir.createOutput("test", IOContext.DEFAULT)) { + out.writeBytes(bytes, 0, bytes.length); + } + + final IndexInput in = dir.openInput("test", IOContext.RANDOM); + final byte[] readBytes = new byte[size]; + in.readBytes(readBytes, 0, readBytes.length); + assertArrayEquals(bytes, readBytes); + } + } }