Add support for posix_madvise to Java 21 MMapDirectory (#13196)

This commit is contained in:
Uwe Schindler 2024-03-25 18:44:33 +01:00 committed by GitHub
parent f4db67fae2
commit a4055dae62
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 293 additions and 8 deletions

View File

@ -132,6 +132,8 @@ allprojects {
if (rootProject.vectorIncubatorJavaVersions.contains(rootProject.runtimeJavaVersion)) {
jvmArgs '--add-modules', 'jdk.incubator.vector'
}
jvmArgs '--enable-native-access=' + (project.path == ':lucene:core' ? 'ALL-UNNAMED' : 'org.apache.lucene.core')
def loggingConfigFile = layout.projectDirectory.file("${resources}/logging.properties")
def tempDir = layout.projectDirectory.dir(testsTmpDir.toString())

View File

@ -197,6 +197,11 @@ New Features
* GITHUB#12915: Add new token filters for Japanese sutegana (捨て仮名). This introduces JapaneseHiraganaUppercaseFilter
and JapaneseKatakanaUppercaseFilter. (Dai Sugimori)
* GITHUB#13196: Add support for posix_madvise to MMapDirectory: If running on Linux/macOS and Java 21
or later, MMapDirectory uses IOContext to pass suitable MADV flags to kernel of operating system.
This may improve paging logic especially when large segments are merged under memory pressure.
(Uwe Schindler, Chris Hegarty, Robert Muir, Adrien Grand)
Improvements
---------------------

View File

@ -46,24 +46,32 @@ public class IOContext {
/** This flag indicates that the file will be opened, then fully read sequentially then closed. */
public final boolean readOnce;
/**
* This flag indicates that the file will be accessed randomly. If this flag is set, then readOnce
* will be false.
*/
public final boolean randomAccess;
/**
* This flag is used for files that are a small fraction of the total index size and are expected
* to be heavily accessed in random-access fashion. Some {@link Directory} implementations may
* choose to load such files into physical memory (e.g. Java heap) as a way to provide stronger
* guarantees on query latency.
* guarantees on query latency. If this flag is set, then {@link #randomAccess} will be true.
*/
public final boolean load;
public static final IOContext DEFAULT = new IOContext(Context.DEFAULT);
public static final IOContext READONCE = new IOContext(true, false);
public static final IOContext READONCE = new IOContext(true, false, false);
public static final IOContext READ = new IOContext(false, false);
public static final IOContext READ = new IOContext(false, false, false);
public static final IOContext LOAD = new IOContext(false, true);
public static final IOContext LOAD = new IOContext(false, true, true);
public static final IOContext RANDOM = new IOContext(false, false, true);
public IOContext() {
this(false, false);
this(false, false, false);
}
public IOContext(FlushInfo flushInfo) {
@ -72,6 +80,7 @@ public class IOContext {
this.mergeInfo = null;
this.readOnce = false;
this.load = false;
this.randomAccess = false;
this.flushInfo = flushInfo;
}
@ -79,11 +88,18 @@ public class IOContext {
this(context, null);
}
private IOContext(boolean readOnce, boolean load) {
private IOContext(boolean readOnce, boolean load, boolean randomAccess) {
if (readOnce && randomAccess) {
throw new IllegalArgumentException("cannot be both readOnce and randomAccess");
}
if (load && randomAccess == false) {
throw new IllegalArgumentException("cannot be load but not randomAccess");
}
this.context = Context.READ;
this.mergeInfo = null;
this.readOnce = readOnce;
this.load = load;
this.randomAccess = randomAccess;
this.flushInfo = null;
}
@ -98,6 +114,7 @@ public class IOContext {
this.context = context;
this.readOnce = false;
this.load = false;
this.randomAccess = false;
this.mergeInfo = mergeInfo;
this.flushInfo = null;
}
@ -115,12 +132,13 @@ public class IOContext {
this.mergeInfo = ctxt.mergeInfo;
this.flushInfo = ctxt.flushInfo;
this.readOnce = readOnce;
this.randomAccess = ctxt.randomAccess;
this.load = false;
}
@Override
public int hashCode() {
return Objects.hash(context, flushInfo, mergeInfo, readOnce, load);
return Objects.hash(context, flushInfo, mergeInfo, readOnce, load, randomAccess);
}
@Override
@ -134,6 +152,7 @@ public class IOContext {
if (!Objects.equals(mergeInfo, other.mergeInfo)) return false;
if (readOnce != other.readOnce) return false;
if (load != other.load) return false;
if (randomAccess != other.randomAccess) return false;
return true;
}
@ -147,6 +166,10 @@ public class IOContext {
+ flushInfo
+ ", readOnce="
+ readOnce
+ ", load="
+ load
+ ", randomAccess="
+ randomAccess
+ "]";
}
}

View File

@ -48,6 +48,13 @@ import org.apache.lucene.util.Constants;
* of box with some compilation tricks. For more information about the foreign memory API read
* documentation of the {@link java.lang.foreign} package.
*
* <p>On some platforms like Linux and MacOS X, this class will invoke the syscall {@code madvise()}
* to advise how OS kernel should handle paging after opening a file. For this to work, Java code
* must be able to call native code. If this is not allowed, a warning is logged. To enable native
* access for Lucene in a modularized application, pass {@code
* --enable-native-access=org.apache.lucene.core} to the Java command line. If Lucene is running in
* a classpath-based application, use {@code --enable-native-access=ALL-UNNAMED}.
*
* <p><b>NOTE:</b> Accessing this class either directly or indirectly from a thread while it's
* interrupted can close the underlying channel immediately if at the same time the thread is
* blocked on IO. The channel will remain closed and subsequent access to {@link MMapDirectory} will
@ -204,6 +211,8 @@ public class MMapDirectory extends FSDirectory {
long getDefaultMaxChunkSize();
boolean supportsMadvise();
default IOException convertMapFailedIOException(
IOException ioe, String resourceDescription, long bufSize) {
final String originalMessage;
@ -269,6 +278,14 @@ public class MMapDirectory extends FSDirectory {
}
}
/**
* Returns true, if MMapDirectory uses the platform's {@code madvise()} syscall to advise how OS
* kernel should handle paging after opening a file.
*/
public static boolean supportsMadvise() {
return PROVIDER.supportsMadvise();
}
static {
PROVIDER = lookupProvider();
DEFAULT_MAX_CHUNK_SIZE = PROVIDER.getDefaultMaxChunkSize();

View File

@ -23,12 +23,19 @@ import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Optional;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.Unwrappable;
@SuppressWarnings("preview")
final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexInputProvider {
private final Optional<NativeAccess> nativeAccess;
MemorySegmentIndexInputProvider() {
this.nativeAccess = NativeAccess.getImplementation();
}
@Override
public IndexInput openInput(Path path, IOContext context, int chunkSizePower, boolean preload)
throws IOException {
@ -45,7 +52,7 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
MemorySegmentIndexInput.newInstance(
resourceDescription,
arena,
map(arena, resourceDescription, fc, chunkSizePower, preload, fileSize),
map(arena, resourceDescription, fc, context, chunkSizePower, preload, fileSize),
fileSize,
chunkSizePower);
success = true;
@ -62,10 +69,16 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
return Constants.JRE_IS_64BIT ? (1L << 34) : (1L << 28);
}
@Override
public boolean supportsMadvise() {
return nativeAccess.isPresent();
}
private final MemorySegment[] map(
Arena arena,
String resourceDescription,
FileChannel fc,
IOContext context,
int chunkSizePower,
boolean preload,
long length)
@ -90,8 +103,12 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
} catch (IOException ioe) {
throw convertMapFailedIOException(ioe, resourceDescription, segSize);
}
// if preload apply it without madvise.
// if chunk size is too small (2 MiB), disable madvise support (incorrect alignment)
if (preload) {
segment.load();
} else if (nativeAccess.isPresent() && chunkSizePower >= 21) {
nativeAccess.get().madvise(segment, context);
}
segments[segNr] = segment;
startOffset += segSize;

View File

@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.store;
import java.io.IOException;
import java.lang.foreign.MemorySegment;
import java.util.Optional;
import org.apache.lucene.util.Constants;
@SuppressWarnings("preview")
abstract class NativeAccess {
/** Invoke the {@code madvise} call for the given {@link MemorySegment}. */
public abstract void madvise(MemorySegment segment, IOContext context) throws IOException;
/**
* Return the NativeAccess instance for this platform. At moment we only support Linux and MacOS
*/
public static Optional<NativeAccess> getImplementation() {
if (Constants.LINUX || Constants.MAC_OS_X) {
return PosixNativeAccess.getInstance();
}
return Optional.empty();
}
}

View File

@ -0,0 +1,156 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.store;
import java.io.IOException;
import java.lang.foreign.FunctionDescriptor;
import java.lang.foreign.Linker;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.SymbolLookup;
import java.lang.foreign.ValueLayout;
import java.lang.invoke.MethodHandle;
import java.util.Locale;
import java.util.Optional;
import java.util.logging.Logger;
import org.apache.lucene.store.IOContext.Context;
@SuppressWarnings("preview")
final class PosixNativeAccess extends NativeAccess {
private static final Logger LOG = Logger.getLogger(PosixNativeAccess.class.getName());
// these constants were extracted from glibc and macos header files - luckily they are the same:
/** No further special treatment. */
public static final int POSIX_MADV_NORMAL = 0;
/** Expect random page references. */
public static final int POSIX_MADV_RANDOM = 1;
/** Expect sequential page references. */
public static final int POSIX_MADV_SEQUENTIAL = 2;
/** Will need these pages. */
public static final int POSIX_MADV_WILLNEED = 3;
/** Don't need these pages. */
public static final int POSIX_MADV_DONTNEED = 4;
private static final MethodHandle MH$posix_madvise;
private static final Optional<NativeAccess> INSTANCE;
private PosixNativeAccess() {}
static Optional<NativeAccess> getInstance() {
return INSTANCE;
}
static {
MethodHandle adviseHandle = null;
PosixNativeAccess instance = null;
try {
adviseHandle = lookupMadvise();
instance = new PosixNativeAccess();
} catch (UnsupportedOperationException uoe) {
LOG.warning(uoe.getMessage());
} catch (
@SuppressWarnings("unused")
IllegalCallerException ice) {
LOG.warning(
String.format(
Locale.ENGLISH,
"Lucene has no access to native functions. To enable access to native functions, "
+ "pass the following on command line: --enable-native-access=%s",
Optional.ofNullable(PosixNativeAccess.class.getModule().getName())
.orElse("ALL-UNNAMED")));
}
MH$posix_madvise = adviseHandle;
INSTANCE = Optional.ofNullable(instance);
}
private static MethodHandle lookupMadvise() {
final Linker linker = Linker.nativeLinker();
final SymbolLookup stdlib = linker.defaultLookup();
final MethodHandle mh =
findFunction(
linker,
stdlib,
"posix_madvise",
FunctionDescriptor.of(
ValueLayout.JAVA_INT,
ValueLayout.ADDRESS,
ValueLayout.JAVA_LONG,
ValueLayout.JAVA_INT));
LOG.info("posix_madvise() available on this platform");
return mh;
}
private static MethodHandle findFunction(
Linker linker, SymbolLookup lookup, String name, FunctionDescriptor desc) {
final MemorySegment symbol =
lookup
.find(name)
.orElseThrow(
() ->
new UnsupportedOperationException(
"Platform has no symbol for '" + name + "' in libc."));
return linker.downcallHandle(symbol, desc);
}
@Override
public void madvise(MemorySegment segment, IOContext context) throws IOException {
// Note: madvise is bypassed if the segment should be preloaded via MemorySegment#load.
if (segment.byteSize() == 0L) {
return; // empty segments should be excluded, because they may have no address at all
}
final Integer advice = mapIOContext(context);
if (advice == null) {
return; // do nothing
}
final int ret;
try {
ret = (int) MH$posix_madvise.invokeExact(segment, segment.byteSize(), advice.intValue());
} catch (Throwable th) {
throw new AssertionError(th);
}
if (ret != 0) {
throw new IOException(
String.format(
Locale.ENGLISH,
"Call to posix_madvise with address=0x%08X and byteSize=%d failed with return code %d.",
segment.address(),
segment.byteSize(),
ret));
}
}
private Integer mapIOContext(IOContext ctx) {
// Merging always wins and implies sequential access, because kernel is advised to free pages
// after use:
if (ctx.context == Context.MERGE) {
return POSIX_MADV_SEQUENTIAL;
}
if (ctx.randomAccess) {
return POSIX_MADV_RANDOM;
}
if (ctx.readOnce) {
return POSIX_MADV_SEQUENTIAL;
}
return null;
}
}

View File

@ -21,6 +21,7 @@ import java.nio.file.Path;
import java.util.Random;
import java.util.concurrent.CountDownLatch;
import org.apache.lucene.tests.store.BaseDirectoryTestCase;
import org.apache.lucene.util.Constants;
/** Tests MMapDirectory */
// See: https://issues.apache.org/jira/browse/SOLR-12028 Tests cannot remove files on Windows
@ -89,4 +90,29 @@ public class TestMMapDirectory extends BaseDirectoryTestCase {
}
}
}
public void testMadviseAvail() throws Exception {
assertEquals(
"madvise should be supported on Linux and Macos",
Constants.LINUX || Constants.MAC_OS_X,
MMapDirectory.supportsMadvise());
}
// Opens the input with IOContext.RANDOM to ensure basic code path coverage for POSIX_MADV_RANDOM.
public void testWithRandom() throws Exception {
final int size = 8 * 1024 * 1024; // large enough to trigger madvise
byte[] bytes = new byte[size];
random().nextBytes(bytes);
try (Directory dir = new MMapDirectory(createTempDir("testWithRandom"))) {
try (IndexOutput out = dir.createOutput("test", IOContext.DEFAULT)) {
out.writeBytes(bytes, 0, bytes.length);
}
final IndexInput in = dir.openInput("test", IOContext.RANDOM);
final byte[] readBytes = new byte[size];
in.readBytes(readBytes, 0, readBytes.length);
assertArrayEquals(bytes, readBytes);
}
}
}