LUCENE-7300: Add HardLinkCopyDirectoryWrapper to speed up file copying if hardlinks are applicable

This commit is contained in:
Simon Willnauer 2016-05-25 11:16:18 +02:00
parent c996c78b47
commit 268da5be45
5 changed files with 181 additions and 2 deletions

View File

@ -26,6 +26,10 @@ New Features
e.g. clear the Gregorian Change Date. Also, toString(cal) is now identical to e.g. clear the Gregorian Change Date. Also, toString(cal) is now identical to
DateTimeFormatter.ISO_INSTANT. (David Smiley) DateTimeFormatter.ISO_INSTANT. (David Smiley)
* LUCENE-7300: The misc module now has a directory wrapper that uses hard-links if
applicable and supported when copying files from another FSDirectory in
Directory#copyFrom. (Simon Willnauer)
API Changes API Changes
* LUCENE-7184: Refactor LatLonPoint encoding methods to new GeoEncodingUtils * LUCENE-7184: Refactor LatLonPoint encoding methods to new GeoEncodingUtils

View File

@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.HardlinkCopyDirectoryWrapper;
import org.apache.lucene.util.SuppressForbidden; import org.apache.lucene.util.SuppressForbidden;
import java.io.IOException; import java.io.IOException;
@ -45,7 +46,8 @@ public class IndexMergeTool {
Directory[] indexes = new Directory[args.length - 1]; Directory[] indexes = new Directory[args.length - 1];
for (int i = 1; i < args.length; i++) { for (int i = 1; i < args.length; i++) {
indexes[i - 1] = FSDirectory.open(Paths.get(args[i])); // try to use hardlinks if possible
indexes[i - 1] = new HardlinkCopyDirectoryWrapper(FSDirectory.open(Paths.get(args[i])));
} }
System.out.println("Merging..."); System.out.println("Merging...");

View File

@ -0,0 +1,95 @@
package org.apache.lucene.store;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.FileAlreadyExistsException;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
import java.security.AccessController;
import java.security.PrivilegedAction;
/**
* This directory wrapper overrides {@link Directory#copyFrom(Directory, String, String, IOContext)} in order
* to optionally use a hard-link instead of a full byte by byte file copy if applicable. Hard-links are only used if the
* underlying filesystem supports it and if the {@link java.nio.file.LinkPermission} "hard" is granted.
*
* <p><b>NOTE:</b> Using hard-links changes the copy semantics of
* {@link Directory#copyFrom(Directory, String, String, IOContext)}. When hard-links are used changes to the source file
* will be reflected in the target file and vice-versa. Within Lucene, files are write once and should not be modified
* after they have been written. This directory should not be used in situations where files change after they have
* been written.
* </p>
*/
public final class HardlinkCopyDirectoryWrapper extends FilterDirectory {
/**
* Creates a new HardlinkCopyDirectoryWrapper delegating to the given directory
*/
public HardlinkCopyDirectoryWrapper(Directory in) {
super(in);
}
@Override
public void copyFrom(Directory from, String srcFile, String destFile, IOContext context) throws IOException {
final Directory fromUnwrapped = FilterDirectory.unwrap(from);
final Directory toUnwrapped = FilterDirectory.unwrap(this);
// try to unwrap to FSDirectory - we might be able to just create hard-links of these files and save copying
// the entire file.
Exception suppressedException = null;
boolean tryCopy = true;
if (fromUnwrapped instanceof FSDirectory
&& toUnwrapped instanceof FSDirectory) {
final Path fromPath = ((FSDirectory) fromUnwrapped).getDirectory();
final Path toPath = ((FSDirectory) toUnwrapped).getDirectory();
if (Files.isReadable(fromPath.resolve(srcFile)) && Files.isWritable(toPath)) {
// only try hardlinks if we have permission to access the files
// if not super.copyFrom() will give us the right exceptions
suppressedException = AccessController.doPrivileged((PrivilegedAction<Exception>) () -> {
try {
Files.createLink(toPath.resolve(destFile), fromPath.resolve(srcFile));
} catch (FileNotFoundException | NoSuchFileException | FileAlreadyExistsException ex) {
return ex; // in these cases we bubble up since it's a true error condition.
} catch (IOException
| UnsupportedOperationException // if the FS doesn't support hard-links
| SecurityException ex // we don't have permission to use hard-links just fall back to byte copy
) {
// hard-links are not supported or the files are on different filesystems
// we could go deeper and check if their filesstores are the same and opt
// out earlier but for now we just fall back to normal file-copy
return ex;
}
return null;
});
tryCopy = suppressedException != null;
}
}
if (tryCopy) {
try {
super.copyFrom(from, srcFile, destFile, context);
} catch (Exception ex) {
if (suppressedException != null) {
ex.addSuppressed(suppressedException);
}
throw ex;
}
}
}
}

View File

@ -0,0 +1,76 @@
package org.apache.lucene.store;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.BasicFileAttributes;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.util.IOUtils;
public class TestHardLinkCopyDirectoryWrapper extends BaseDirectoryTestCase {
@Override
protected Directory getDirectory(Path file) throws IOException {
Directory open = random().nextBoolean() ? newFSDirectory(file) : newDirectory();
return new HardlinkCopyDirectoryWrapper(open);
}
/**
* Tests that we use hardlinks if possible on Directory#copyFrom
*/
public void testCopyHardLinks() throws IOException {
Path tempDir = createTempDir();
Path dir_1 = tempDir.resolve("dir_1");
Path dir_2 = tempDir.resolve("dir_2");
Files.createDirectories(dir_1);
Files.createDirectories(dir_2);
Directory luceneDir_1 = newFSDirectory(dir_1);
Directory luceneDir_2 = newFSDirectory(dir_2);
try(IndexOutput output = luceneDir_1.createOutput("foo.bar", IOContext.DEFAULT)) {
CodecUtil.writeHeader(output, "foo", 0);
output.writeString("hey man, nice shot!");
CodecUtil.writeFooter(output);
}
try {
Files.createLink(tempDir.resolve("test"), dir_1.resolve("foo.bar"));
BasicFileAttributes destAttr = Files.readAttributes(tempDir.resolve("test"), BasicFileAttributes.class);
BasicFileAttributes sourceAttr = Files.readAttributes(dir_1.resolve("foo.bar"), BasicFileAttributes.class);
assumeTrue("hardlinks are not supported", destAttr.fileKey() != null
&& destAttr.fileKey().equals(sourceAttr.fileKey()));
} catch (UnsupportedOperationException ex) {
assumeFalse("hardlinks are not supported", false);
}
HardlinkCopyDirectoryWrapper wrapper = new HardlinkCopyDirectoryWrapper(luceneDir_2);
wrapper.copyFrom(luceneDir_1, "foo.bar", "bar.foo", IOContext.DEFAULT);
assertTrue(Files.exists(dir_2.resolve("bar.foo")));
BasicFileAttributes destAttr = Files.readAttributes(dir_2.resolve("bar.foo"), BasicFileAttributes.class);
BasicFileAttributes sourceAttr = Files.readAttributes(dir_1.resolve("foo.bar"), BasicFileAttributes.class);
assertEquals(destAttr.fileKey(), sourceAttr.fileKey());
try(ChecksumIndexInput indexInput = wrapper.openChecksumInput("bar.foo", IOContext.DEFAULT)) {
CodecUtil.checkHeader(indexInput, "foo", 0, 0);
assertEquals("hey man, nice shot!", indexInput.readString());
CodecUtil.checkFooter(indexInput);
}
IOUtils.close(luceneDir_1, luceneDir_2);
}
}

View File

@ -38,7 +38,9 @@ grant {
permission java.io.FilePermission "${junit4.childvm.cwd}${/}jacoco.db", "write"; permission java.io.FilePermission "${junit4.childvm.cwd}${/}jacoco.db", "write";
permission java.io.FilePermission "${junit4.tempDir}${/}*", "read,write,delete"; permission java.io.FilePermission "${junit4.tempDir}${/}*", "read,write,delete";
permission java.io.FilePermission "${clover.db.dir}${/}-", "read,write,delete"; permission java.io.FilePermission "${clover.db.dir}${/}-", "read,write,delete";
// misc HardlinkCopyDirectoryWrapper needs this to test if hardlinks can be created
permission java.nio.file.LinkPermission "hard";
// needed by SSD detection tests in TestIOUtils (creates symlinks) // needed by SSD detection tests in TestIOUtils (creates symlinks)
permission java.nio.file.LinkPermission "symbolic"; permission java.nio.file.LinkPermission "symbolic";