mirror of https://github.com/apache/lucene.git
LUCENE-8853: Try parsing original file extension from tmp file (#716)
FileSwitchDirectory fails if the tmp file are not in the same directory as the file it's renamed to. This is correct behavior but breaks with tmp files used with index sorting. This change tries best effort to find the right extension directory if the file ends with `.tmp`
This commit is contained in:
parent
4f6314c59b
commit
fb6e28d9f1
|
@ -83,6 +83,9 @@ Bug Fixes
|
|||
contents to ensure we don't expose pending deletes if both directory point to the same
|
||||
underlying filesystem directory. (Simon Willnauer)
|
||||
|
||||
* LUCENE-8853: FileSwitchDirectory now applies best effort to place tmp files in the same
|
||||
direcotry as the target files. (Simon Willnauer)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.nio.file.NoSuchFileException;
|
|||
import java.util.Collection; // for javadocs
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
|
@ -207,4 +208,13 @@ public abstract class Directory implements Closeable {
|
|||
* @lucene.internal
|
||||
*/
|
||||
public abstract Set<String> getPendingDeletions() throws IOException;
|
||||
|
||||
/**
|
||||
* Creates a file name for a temporary file. The name will start with
|
||||
* {@code prefix}, end with {@code suffix} and have a reserved file extension {@code .tmp}.
|
||||
* @see #createTempOutput(String, String, IOContext)
|
||||
*/
|
||||
protected static String getTempFileName(String prefix, String suffix, long counter) {
|
||||
return IndexFileNames.segmentFileName(prefix, suffix + "_" + Long.toString(counter, Character.MAX_RADIX), "tmp");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,7 +41,6 @@ import java.util.concurrent.Future;
|
|||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
@ -261,7 +260,7 @@ public abstract class FSDirectory extends BaseDirectory {
|
|||
maybeDeletePendingFiles();
|
||||
while (true) {
|
||||
try {
|
||||
String name = IndexFileNames.segmentFileName(prefix, suffix + "_" + Long.toString(nextTempFileCounter.getAndIncrement(), Character.MAX_RADIX), "tmp");
|
||||
String name = getTempFileName(prefix, suffix, nextTempFileCounter.getAndIncrement());
|
||||
if (pendingDeletes.contains(name)) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -27,6 +27,8 @@ import java.util.Collections;
|
|||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
@ -53,8 +55,12 @@ public class FileSwitchDirectory extends Directory {
|
|||
private final Directory primaryDir;
|
||||
private final Set<String> primaryExtensions;
|
||||
private boolean doClose;
|
||||
private static final Pattern EXT_PATTERN = Pattern.compile("\\.([a-zA-Z]+)");
|
||||
|
||||
public FileSwitchDirectory(Set<String> primaryExtensions, Directory primaryDir, Directory secondaryDir, boolean doClose) {
|
||||
if (primaryExtensions.contains("tmp")) {
|
||||
throw new IllegalArgumentException("tmp is a reserved extension");
|
||||
}
|
||||
this.primaryExtensions = primaryExtensions;
|
||||
this.primaryDir = primaryDir;
|
||||
this.secondaryDir = secondaryDir;
|
||||
|
@ -141,7 +147,14 @@ public class FileSwitchDirectory extends Directory {
|
|||
if (i == -1) {
|
||||
return "";
|
||||
}
|
||||
return name.substring(i+1, name.length());
|
||||
String ext = name.substring(i + 1);
|
||||
if (ext.equals("tmp")) {
|
||||
Matcher matcher = EXT_PATTERN.matcher(name.substring(0, i + 1));
|
||||
if (matcher.find()) {
|
||||
return matcher.group(1);
|
||||
}
|
||||
}
|
||||
return ext;
|
||||
}
|
||||
|
||||
private Directory getDirectory(String name) {
|
||||
|
@ -174,7 +187,12 @@ public class FileSwitchDirectory extends Directory {
|
|||
|
||||
@Override
|
||||
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException {
|
||||
return getDirectory("."+suffix).createTempOutput(prefix, suffix, context);
|
||||
// this is best effort - it's ok to create a tmp file with any prefix and suffix. Yet if this file is then
|
||||
// in-turn used to rename they must match to the same directory hence we use the full file-name to find
|
||||
// the right directory. Here we can't make a decision but we need to ensure that all other operations
|
||||
// map to the right directory.
|
||||
String tmpFileName = getTempFileName(prefix, suffix, 0);
|
||||
return getDirectory(tmpFileName).createTempOutput(prefix, suffix, context);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -183,10 +201,11 @@ public class FileSwitchDirectory extends Directory {
|
|||
List<String> secondaryNames = new ArrayList<>();
|
||||
|
||||
for (String name : names)
|
||||
if (primaryExtensions.contains(getExtension(name)))
|
||||
if (primaryExtensions.contains(getExtension(name))) {
|
||||
primaryNames.add(name);
|
||||
else
|
||||
} else {
|
||||
secondaryNames.add(name);
|
||||
}
|
||||
|
||||
primaryDir.sync(primaryNames);
|
||||
secondaryDir.sync(secondaryNames);
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.store;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.nio.file.AtomicMoveNotSupportedException;
|
||||
import java.nio.file.FileSystem;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Arrays;
|
||||
|
@ -114,6 +115,32 @@ public class TestFileSwitchDirectory extends BaseDirectoryTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testRenameTmpFile() throws IOException {
|
||||
try (Directory directory = getDirectory(createTempDir())) {
|
||||
final String name;
|
||||
try (IndexOutput out = directory.createTempOutput("foo.cfs", "", IOContext.DEFAULT)) {
|
||||
out.writeInt(1);
|
||||
name = out.getName();
|
||||
}
|
||||
assertEquals(1, Arrays.stream(directory.listAll()).filter(f -> f.equals(name)).count());
|
||||
assertEquals(0, Arrays.stream(directory.listAll()).filter(f -> f.equals("foo.cfs")).count());
|
||||
directory.rename(name, "foo.cfs");
|
||||
assertEquals(1, Arrays.stream(directory.listAll()).filter(f -> f.equals("foo.cfs")).count());
|
||||
assertEquals(0, Arrays.stream(directory.listAll()).filter(f -> f.equals(name)).count());
|
||||
}
|
||||
|
||||
try (Directory directory= newFSSwitchDirectory(Collections.singleton("bar"))) {
|
||||
String brokenName;
|
||||
try (IndexOutput out = directory.createTempOutput("foo", "bar", IOContext.DEFAULT)) {
|
||||
out.writeInt(1);
|
||||
brokenName = out.getName();
|
||||
}
|
||||
AtomicMoveNotSupportedException exception = expectThrows(AtomicMoveNotSupportedException.class,
|
||||
() -> directory.rename(brokenName, "foo.bar"));
|
||||
assertEquals("foo_bar_0.tmp -> foo.bar: source and dest are in different directories", exception.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Directory getDirectory(Path path) throws IOException {
|
||||
Set<String> extensions = new HashSet<String>();
|
||||
|
|
|
@ -91,6 +91,7 @@ import org.apache.lucene.store.ByteBuffersDirectory;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.FSLockFactory;
|
||||
import org.apache.lucene.store.FileSwitchDirectory;
|
||||
import org.apache.lucene.store.FlushInfo;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.LockFactory;
|
||||
|
@ -1414,14 +1415,13 @@ public abstract class LuceneTestCase extends Assert {
|
|||
}
|
||||
|
||||
Directory fsdir = newFSDirectoryImpl(clazz, f, lf);
|
||||
// LUCENE-8853: FileSwitchDirectory is broken for tmp outputs.
|
||||
// if (rarely()) {
|
||||
// List<String> fileExtensions =
|
||||
// Arrays.asList("fdt", "fdx", "tim", "tip", "si", "fnm", "pos", "dii", "dim", "nvm", "nvd", "dvm", "dvd");
|
||||
// Collections.shuffle(fileExtensions, random());
|
||||
// fileExtensions = fileExtensions.subList(0, 1 + random().nextInt(fileExtensions.size()));
|
||||
// fsdir = new FileSwitchDirectory(new HashSet<>(fileExtensions), fsdir, newFSDirectoryImpl(clazz, f, lf), true);
|
||||
// }
|
||||
if (rarely()) {
|
||||
List<String> fileExtensions =
|
||||
Arrays.asList("fdt", "fdx", "tim", "tip", "si", "fnm", "pos", "dii", "dim", "nvm", "nvd", "dvm", "dvd");
|
||||
Collections.shuffle(fileExtensions, random());
|
||||
fileExtensions = fileExtensions.subList(0, 1 + random().nextInt(fileExtensions.size()));
|
||||
fsdir = new FileSwitchDirectory(new HashSet<>(fileExtensions), fsdir, newFSDirectoryImpl(clazz, f, lf), true);
|
||||
}
|
||||
BaseDirectoryWrapper wrapped = wrapDirectory(random(), fsdir, bare);
|
||||
return wrapped;
|
||||
} catch (Exception e) {
|
||||
|
|
Loading…
Reference in New Issue