From a6839beb87a73bff6139df44a7b9168a498dd426 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Wed, 25 May 2016 11:16:18 +0200 Subject: [PATCH] LUCENE-7300: Add HardLinkCopyDirectoryWrapper to speed up file copying if hardlinks are applicable --- lucene/CHANGES.txt | 4 + .../apache/lucene/misc/IndexMergeTool.java | 4 +- .../store/HardlinkCopyDirectoryWrapper.java | 95 +++++++++++++++++++ .../TestHardLinkCopyDirectoryWrapper.java | 76 +++++++++++++++ lucene/tools/junit4/tests.policy | 4 +- 5 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 lucene/misc/src/java/org/apache/lucene/store/HardlinkCopyDirectoryWrapper.java create mode 100644 lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index a6e3b7163ec..a29ca4479be 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -23,6 +23,10 @@ New Features e.g. clear the Gregorian Change Date. Also, toString(cal) is now identical to DateTimeFormatter.ISO_INSTANT. (David Smiley) +* LUCENE-7300: The misc module now has a directory wrapper that uses hard-links if + applicable and supported when copying files from another FSDirectory in + Directory#copyFrom. (Simon Willnauer) + API Changes * LUCENE-7163: refactor GeoRect, Polygon, and GeoUtils tests to geo diff --git a/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java b/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java index a63c16b1a11..cbb11cdd749 100644 --- a/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java +++ b/lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java @@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.HardlinkCopyDirectoryWrapper; import org.apache.lucene.util.SuppressForbidden; import java.io.IOException; @@ -45,7 +46,8 @@ public class IndexMergeTool { Directory[] indexes = new Directory[args.length - 1]; for (int i = 1; i < args.length; i++) { - indexes[i - 1] = FSDirectory.open(Paths.get(args[i])); + // try to use hardlinks if possible + indexes[i - 1] = new HardlinkCopyDirectoryWrapper(FSDirectory.open(Paths.get(args[i]))); } System.out.println("Merging..."); diff --git a/lucene/misc/src/java/org/apache/lucene/store/HardlinkCopyDirectoryWrapper.java b/lucene/misc/src/java/org/apache/lucene/store/HardlinkCopyDirectoryWrapper.java new file mode 100644 index 00000000000..374178ec84a --- /dev/null +++ b/lucene/misc/src/java/org/apache/lucene/store/HardlinkCopyDirectoryWrapper.java @@ -0,0 +1,95 @@ +package org.apache.lucene.store; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.file.FileAlreadyExistsException; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.security.AccessController; +import java.security.PrivilegedAction; + +/** + * This directory wrapper overrides {@link Directory#copyFrom(Directory, String, String, IOContext)} in order + * to optionally use a hard-link instead of a full byte by byte file copy if applicable. Hard-links are only used if the + * underlying filesystem supports it and if the {@link java.nio.file.LinkPermission} "hard" is granted. + * + *

NOTE: Using hard-links changes the copy semantics of + * {@link Directory#copyFrom(Directory, String, String, IOContext)}. When hard-links are used changes to the source file + * will be reflected in the target file and vice-versa. Within Lucene, files are write once and should not be modified + * after they have been written. This directory should not be used in situations where files change after they have + * been written. + *

+ */ +public final class HardlinkCopyDirectoryWrapper extends FilterDirectory { + /** + * Creates a new HardlinkCopyDirectoryWrapper delegating to the given directory + */ + public HardlinkCopyDirectoryWrapper(Directory in) { + super(in); + } + + @Override + public void copyFrom(Directory from, String srcFile, String destFile, IOContext context) throws IOException { + final Directory fromUnwrapped = FilterDirectory.unwrap(from); + final Directory toUnwrapped = FilterDirectory.unwrap(this); + // try to unwrap to FSDirectory - we might be able to just create hard-links of these files and save copying + // the entire file. + Exception suppressedException = null; + boolean tryCopy = true; + if (fromUnwrapped instanceof FSDirectory + && toUnwrapped instanceof FSDirectory) { + final Path fromPath = ((FSDirectory) fromUnwrapped).getDirectory(); + final Path toPath = ((FSDirectory) toUnwrapped).getDirectory(); + + if (Files.isReadable(fromPath.resolve(srcFile)) && Files.isWritable(toPath)) { + // only try hardlinks if we have permission to access the files + // if not super.copyFrom() will give us the right exceptions + suppressedException = AccessController.doPrivileged((PrivilegedAction) () -> { + try { + Files.createLink(toPath.resolve(destFile), fromPath.resolve(srcFile)); + } catch (FileNotFoundException | NoSuchFileException | FileAlreadyExistsException ex) { + return ex; // in these cases we bubble up since it's a true error condition. + } catch (IOException + | UnsupportedOperationException // if the FS doesn't support hard-links + | SecurityException ex // we don't have permission to use hard-links just fall back to byte copy + ) { + // hard-links are not supported or the files are on different filesystems + // we could go deeper and check if their filesstores are the same and opt + // out earlier but for now we just fall back to normal file-copy + return ex; + } + return null; + }); + tryCopy = suppressedException != null; + } + } + if (tryCopy) { + try { + super.copyFrom(from, srcFile, destFile, context); + } catch (Exception ex) { + if (suppressedException != null) { + ex.addSuppressed(suppressedException); + } + throw ex; + } + } + } +} diff --git a/lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java b/lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java new file mode 100644 index 00000000000..b084c925490 --- /dev/null +++ b/lucene/misc/src/test/org/apache/lucene/store/TestHardLinkCopyDirectoryWrapper.java @@ -0,0 +1,76 @@ +package org.apache.lucene.store; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributes; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.util.IOUtils; + +public class TestHardLinkCopyDirectoryWrapper extends BaseDirectoryTestCase { + + @Override + protected Directory getDirectory(Path file) throws IOException { + Directory open = random().nextBoolean() ? newFSDirectory(file) : newDirectory(); + return new HardlinkCopyDirectoryWrapper(open); + } + + /** + * Tests that we use hardlinks if possible on Directory#copyFrom + */ + public void testCopyHardLinks() throws IOException { + Path tempDir = createTempDir(); + Path dir_1 = tempDir.resolve("dir_1"); + Path dir_2 = tempDir.resolve("dir_2"); + Files.createDirectories(dir_1); + Files.createDirectories(dir_2); + + Directory luceneDir_1 = newFSDirectory(dir_1); + Directory luceneDir_2 = newFSDirectory(dir_2); + try(IndexOutput output = luceneDir_1.createOutput("foo.bar", IOContext.DEFAULT)) { + CodecUtil.writeHeader(output, "foo", 0); + output.writeString("hey man, nice shot!"); + CodecUtil.writeFooter(output); + } + try { + Files.createLink(tempDir.resolve("test"), dir_1.resolve("foo.bar")); + BasicFileAttributes destAttr = Files.readAttributes(tempDir.resolve("test"), BasicFileAttributes.class); + BasicFileAttributes sourceAttr = Files.readAttributes(dir_1.resolve("foo.bar"), BasicFileAttributes.class); + assumeTrue("hardlinks are not supported", destAttr.fileKey() != null + && destAttr.fileKey().equals(sourceAttr.fileKey())); + } catch (UnsupportedOperationException ex) { + assumeFalse("hardlinks are not supported", false); + } + + HardlinkCopyDirectoryWrapper wrapper = new HardlinkCopyDirectoryWrapper(luceneDir_2); + wrapper.copyFrom(luceneDir_1, "foo.bar", "bar.foo", IOContext.DEFAULT); + assertTrue(Files.exists(dir_2.resolve("bar.foo"))); + BasicFileAttributes destAttr = Files.readAttributes(dir_2.resolve("bar.foo"), BasicFileAttributes.class); + BasicFileAttributes sourceAttr = Files.readAttributes(dir_1.resolve("foo.bar"), BasicFileAttributes.class); + assertEquals(destAttr.fileKey(), sourceAttr.fileKey()); + try(ChecksumIndexInput indexInput = wrapper.openChecksumInput("bar.foo", IOContext.DEFAULT)) { + CodecUtil.checkHeader(indexInput, "foo", 0, 0); + assertEquals("hey man, nice shot!", indexInput.readString()); + CodecUtil.checkFooter(indexInput); + } + IOUtils.close(luceneDir_1, luceneDir_2); + } +} diff --git a/lucene/tools/junit4/tests.policy b/lucene/tools/junit4/tests.policy index a579fe251d3..f1d8f106dc2 100644 --- a/lucene/tools/junit4/tests.policy +++ b/lucene/tools/junit4/tests.policy @@ -38,7 +38,9 @@ grant { permission java.io.FilePermission "${junit4.childvm.cwd}${/}jacoco.db", "write"; permission java.io.FilePermission "${junit4.tempDir}${/}*", "read,write,delete"; permission java.io.FilePermission "${clover.db.dir}${/}-", "read,write,delete"; - + + // misc HardlinkCopyDirectoryWrapper needs this to test if hardlinks can be created + permission java.nio.file.LinkPermission "hard"; // needed by SSD detection tests in TestIOUtils (creates symlinks) permission java.nio.file.LinkPermission "symbolic";