diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 2c3efe11f09..b85ddf0bada 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -83,6 +83,9 @@ Bug Fixes contents to ensure we don't expose pending deletes if both directory point to the same underlying filesystem directory. (Simon Willnauer) +* LUCENE-8853: FileSwitchDirectory now applies best effort to place tmp files in the same + direcotry as the target files. (Simon Willnauer) + Improvements * LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier diff --git a/lucene/core/src/java/org/apache/lucene/store/Directory.java b/lucene/core/src/java/org/apache/lucene/store/Directory.java index 13e8e904b2e..5763d2c4eac 100644 --- a/lucene/core/src/java/org/apache/lucene/store/Directory.java +++ b/lucene/core/src/java/org/apache/lucene/store/Directory.java @@ -24,6 +24,7 @@ import java.nio.file.NoSuchFileException; import java.util.Collection; // for javadocs import java.util.Set; +import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.util.IOUtils; /** @@ -207,4 +208,13 @@ public abstract class Directory implements Closeable { * @lucene.internal */ public abstract Set getPendingDeletions() throws IOException; + + /** + * Creates a file name for a temporary file. The name will start with + * {@code prefix}, end with {@code suffix} and have a reserved file extension {@code .tmp}. + * @see #createTempOutput(String, String, IOContext) + */ + protected static String getTempFileName(String prefix, String suffix, long counter) { + return IndexFileNames.segmentFileName(prefix, suffix + "_" + Long.toString(counter, Character.MAX_RADIX), "tmp"); + } } diff --git a/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java b/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java index 615c089edea..cbaafb2f86d 100644 --- a/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java +++ b/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java @@ -41,7 +41,6 @@ import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; -import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.util.Constants; import org.apache.lucene.util.IOUtils; @@ -261,7 +260,7 @@ public abstract class FSDirectory extends BaseDirectory { maybeDeletePendingFiles(); while (true) { try { - String name = IndexFileNames.segmentFileName(prefix, suffix + "_" + Long.toString(nextTempFileCounter.getAndIncrement(), Character.MAX_RADIX), "tmp"); + String name = getTempFileName(prefix, suffix, nextTempFileCounter.getAndIncrement()); if (pendingDeletes.contains(name)) { continue; } diff --git a/lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java b/lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java index 7484ae00f56..9552710becf 100644 --- a/lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java +++ b/lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java @@ -27,6 +27,8 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.lucene.util.IOUtils; @@ -53,8 +55,12 @@ public class FileSwitchDirectory extends Directory { private final Directory primaryDir; private final Set primaryExtensions; private boolean doClose; + private static final Pattern EXT_PATTERN = Pattern.compile("\\.([a-zA-Z]+)"); public FileSwitchDirectory(Set primaryExtensions, Directory primaryDir, Directory secondaryDir, boolean doClose) { + if (primaryExtensions.contains("tmp")) { + throw new IllegalArgumentException("tmp is a reserved extension"); + } this.primaryExtensions = primaryExtensions; this.primaryDir = primaryDir; this.secondaryDir = secondaryDir; @@ -141,7 +147,14 @@ public class FileSwitchDirectory extends Directory { if (i == -1) { return ""; } - return name.substring(i+1, name.length()); + String ext = name.substring(i + 1); + if (ext.equals("tmp")) { + Matcher matcher = EXT_PATTERN.matcher(name.substring(0, i + 1)); + if (matcher.find()) { + return matcher.group(1); + } + } + return ext; } private Directory getDirectory(String name) { @@ -174,7 +187,12 @@ public class FileSwitchDirectory extends Directory { @Override public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException { - return getDirectory("."+suffix).createTempOutput(prefix, suffix, context); + // this is best effort - it's ok to create a tmp file with any prefix and suffix. Yet if this file is then + // in-turn used to rename they must match to the same directory hence we use the full file-name to find + // the right directory. Here we can't make a decision but we need to ensure that all other operations + // map to the right directory. + String tmpFileName = getTempFileName(prefix, suffix, 0); + return getDirectory(tmpFileName).createTempOutput(prefix, suffix, context); } @Override @@ -183,10 +201,11 @@ public class FileSwitchDirectory extends Directory { List secondaryNames = new ArrayList<>(); for (String name : names) - if (primaryExtensions.contains(getExtension(name))) + if (primaryExtensions.contains(getExtension(name))) { primaryNames.add(name); - else + } else { secondaryNames.add(name); + } primaryDir.sync(primaryNames); secondaryDir.sync(secondaryNames); diff --git a/lucene/core/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java b/lucene/core/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java index 256ee1b871b..0a12eedc614 100644 --- a/lucene/core/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java +++ b/lucene/core/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java @@ -19,6 +19,7 @@ package org.apache.lucene.store; import java.io.IOException; import java.net.URI; +import java.nio.file.AtomicMoveNotSupportedException; import java.nio.file.FileSystem; import java.nio.file.Path; import java.util.Arrays; @@ -114,6 +115,32 @@ public class TestFileSwitchDirectory extends BaseDirectoryTestCase { dir.close(); } + public void testRenameTmpFile() throws IOException { + try (Directory directory = getDirectory(createTempDir())) { + final String name; + try (IndexOutput out = directory.createTempOutput("foo.cfs", "", IOContext.DEFAULT)) { + out.writeInt(1); + name = out.getName(); + } + assertEquals(1, Arrays.stream(directory.listAll()).filter(f -> f.equals(name)).count()); + assertEquals(0, Arrays.stream(directory.listAll()).filter(f -> f.equals("foo.cfs")).count()); + directory.rename(name, "foo.cfs"); + assertEquals(1, Arrays.stream(directory.listAll()).filter(f -> f.equals("foo.cfs")).count()); + assertEquals(0, Arrays.stream(directory.listAll()).filter(f -> f.equals(name)).count()); + } + + try (Directory directory= newFSSwitchDirectory(Collections.singleton("bar"))) { + String brokenName; + try (IndexOutput out = directory.createTempOutput("foo", "bar", IOContext.DEFAULT)) { + out.writeInt(1); + brokenName = out.getName(); + } + AtomicMoveNotSupportedException exception = expectThrows(AtomicMoveNotSupportedException.class, + () -> directory.rename(brokenName, "foo.bar")); + assertEquals("foo_bar_0.tmp -> foo.bar: source and dest are in different directories", exception.getMessage()); + } + } + @Override protected Directory getDirectory(Path path) throws IOException { Set extensions = new HashSet(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java index cd8d492ca6f..e5159a784e3 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java @@ -91,6 +91,7 @@ import org.apache.lucene.store.ByteBuffersDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSLockFactory; +import org.apache.lucene.store.FileSwitchDirectory; import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.LockFactory; @@ -1414,14 +1415,13 @@ public abstract class LuceneTestCase extends Assert { } Directory fsdir = newFSDirectoryImpl(clazz, f, lf); -// LUCENE-8853: FileSwitchDirectory is broken for tmp outputs. -// if (rarely()) { -// List fileExtensions = -// Arrays.asList("fdt", "fdx", "tim", "tip", "si", "fnm", "pos", "dii", "dim", "nvm", "nvd", "dvm", "dvd"); -// Collections.shuffle(fileExtensions, random()); -// fileExtensions = fileExtensions.subList(0, 1 + random().nextInt(fileExtensions.size())); -// fsdir = new FileSwitchDirectory(new HashSet<>(fileExtensions), fsdir, newFSDirectoryImpl(clazz, f, lf), true); -// } + if (rarely()) { + List fileExtensions = + Arrays.asList("fdt", "fdx", "tim", "tip", "si", "fnm", "pos", "dii", "dim", "nvm", "nvd", "dvm", "dvd"); + Collections.shuffle(fileExtensions, random()); + fileExtensions = fileExtensions.subList(0, 1 + random().nextInt(fileExtensions.size())); + fsdir = new FileSwitchDirectory(new HashSet<>(fileExtensions), fsdir, newFSDirectoryImpl(clazz, f, lf), true); + } BaseDirectoryWrapper wrapped = wrapDirectory(random(), fsdir, bare); return wrapped; } catch (Exception e) {