mirror of https://github.com/apache/lucene.git
LUCENE-8835: Respect file extension when listing files form FileSwitchDirectory (#700)
FileSwitchDirectory splits file actions between 2 directories based on file extensions. The extensions are respected on write operations like delete or create but ignored when we list the content of the directories. Until now we only deduplicated the contents on Directory#listAll which can cause inconsistencies and hard to debug errors due to double deletions in IndexWriter is a file is pending delete in one of the directories but still shows up in the directory listing form the other directory. This case can happen if both directories point to the same underlying FS directory which is a common use-case to split between mmap and NIOFS. This change filters out files from directories depending on their file extension to make sure files that are deleted in one directory are not returned form another if they point to the same FS directory.
This commit is contained in:
parent
abeecfacd4
commit
b6c68ccded
|
@ -74,6 +74,10 @@ Bug Fixes
|
|||
* LUCENE-8843: Don't ignore exceptions that are thrown when trying to open a
|
||||
file in IOUtils#fsync. (Jason Tedor via Adrien Grand)
|
||||
|
||||
* LUCENE-8835: FileSwitchDirectory now respects the file extension when listing directory
|
||||
contents to ensure we don't expose pending deletes if both directory point to the same
|
||||
underlying filesystem directory. (Simon Willnauer)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-7840: Non-scoring BooleanQuery now removes SHOULD clauses before building the scorer supplier
|
||||
|
|
|
@ -86,7 +86,7 @@ public class FileSwitchDirectory extends Directory {
|
|||
|
||||
@Override
|
||||
public String[] listAll() throws IOException {
|
||||
Set<String> files = new HashSet<>();
|
||||
List<String> files = new ArrayList<>();
|
||||
// LUCENE-3380: either or both of our dirs could be FSDirs,
|
||||
// but if one underlying delegate is an FSDir and mkdirs() has not
|
||||
// yet been called, because so far everything is written to the other,
|
||||
|
@ -94,14 +94,24 @@ public class FileSwitchDirectory extends Directory {
|
|||
NoSuchFileException exc = null;
|
||||
try {
|
||||
for(String f : primaryDir.listAll()) {
|
||||
files.add(f);
|
||||
String ext = getExtension(f);
|
||||
// we should respect the extension here as well to ensure that we don't list a file that is already
|
||||
// deleted or rather in the one of the directories pending deletions if both directories point
|
||||
// to the same filesystem path. This is quite common for instance to use NIOFS as a primary
|
||||
// and MMap as a secondary to only mmap files like docvalues or term dictionaries.
|
||||
if (primaryExtensions.contains(ext)) {
|
||||
files.add(f);
|
||||
}
|
||||
}
|
||||
} catch (NoSuchFileException e) {
|
||||
exc = e;
|
||||
}
|
||||
try {
|
||||
for(String f : secondaryDir.listAll()) {
|
||||
files.add(f);
|
||||
String ext = getExtension(f);
|
||||
if (primaryExtensions.contains(ext) == false) {
|
||||
files.add(f);
|
||||
}
|
||||
}
|
||||
} catch (NoSuchFileException e) {
|
||||
// we got NoSuchFileException from both dirs
|
||||
|
|
|
@ -18,10 +18,14 @@ package org.apache.lucene.store;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.nio.file.FileSystem;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter;
|
||||
|
@ -31,9 +35,10 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.TestIndexWriterReader;
|
||||
import org.apache.lucene.mockfile.FilterPath;
|
||||
import org.apache.lucene.mockfile.WindowsFS;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
// See: https://issues.apache.org/jira/browse/SOLR-12028 Tests cannot remove files on Windows machines occasionally
|
||||
public class TestFileSwitchDirectory extends BaseDirectoryTestCase {
|
||||
|
||||
/**
|
||||
|
@ -131,4 +136,30 @@ public class TestFileSwitchDirectory extends BaseDirectoryTestCase {
|
|||
}
|
||||
return newFSSwitchDirectory(extensions);
|
||||
}
|
||||
|
||||
public void testDeleteAndList() throws IOException {
|
||||
// relies on windows semantics
|
||||
Path path = createTempDir();
|
||||
FileSystem fs = new WindowsFS(path.getFileSystem()).getFileSystem(URI.create("file:///"));
|
||||
Path indexPath = new FilterPath(path, fs);
|
||||
try (final FileSwitchDirectory dir = new FileSwitchDirectory(Collections.singleton("tim"),
|
||||
new SimpleFSDirectory(indexPath), new SimpleFSDirectory(indexPath), true)) {
|
||||
dir.createOutput("foo.tim", IOContext.DEFAULT).close();
|
||||
Function<String[], Long> stripExtra = array -> Arrays.asList(array).stream()
|
||||
.filter(f -> f.startsWith("extra") == false).count();
|
||||
try (IndexInput indexInput = dir.openInput("foo.tim", IOContext.DEFAULT)) {
|
||||
dir.deleteFile("foo.tim");
|
||||
assertEquals(1, dir.getPrimaryDir().getPendingDeletions().size());
|
||||
assertEquals(1, dir.getPendingDeletions().size());
|
||||
assertEquals(0, stripExtra.apply(dir.listAll()).intValue());
|
||||
assertEquals(0, stripExtra.apply(dir.getPrimaryDir().listAll()).intValue());
|
||||
assertEquals(1, stripExtra.apply(dir.getSecondaryDir().listAll()).intValue());
|
||||
}
|
||||
assertEquals(0, dir.getPrimaryDir().getPendingDeletions().size());
|
||||
assertEquals(0, dir.getPendingDeletions().size());
|
||||
assertEquals(0, stripExtra.apply(dir.listAll()).intValue());
|
||||
assertEquals(0, stripExtra.apply(dir.getPrimaryDir().listAll()).intValue());
|
||||
assertEquals(0, stripExtra.apply(dir.getSecondaryDir().listAll()).intValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -91,6 +91,7 @@ import org.apache.lucene.store.ByteBuffersDirectory;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.FSLockFactory;
|
||||
import org.apache.lucene.store.FileSwitchDirectory;
|
||||
import org.apache.lucene.store.FlushInfo;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.LockFactory;
|
||||
|
@ -1414,6 +1415,13 @@ public abstract class LuceneTestCase extends Assert {
|
|||
}
|
||||
|
||||
Directory fsdir = newFSDirectoryImpl(clazz, f, lf);
|
||||
if (rarely()) {
|
||||
List<String> fileExtensions =
|
||||
Arrays.asList("fdt", "fdx", "tim", "tip", "si", "fnm", "pos", "dii", "dim", "nvm", "nvd", "dvm", "dvd");
|
||||
Collections.shuffle(fileExtensions, random());
|
||||
fileExtensions = fileExtensions.subList(0, 1 + random().nextInt(fileExtensions.size()));
|
||||
fsdir = new FileSwitchDirectory(new HashSet<>(fileExtensions), fsdir, newFSDirectoryImpl(clazz, f, lf), true);
|
||||
}
|
||||
BaseDirectoryWrapper wrapped = wrapDirectory(random(), fsdir, bare);
|
||||
return wrapped;
|
||||
} catch (Exception e) {
|
||||
|
|
Loading…
Reference in New Issue