diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index ce793093575..f161f5a98f7 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -4617,8 +4617,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { * commits are no longer needed. Otherwise, those commits will * be deleted the next time commit() is called. */ - // nocommit remove this public synchronized void deleteUnusedFiles() throws IOException { + // TODO: should we remove this method now that it's the Directory's job to retry deletions? Except, for the super expert IDP use case + // it's still needed? ensureOpen(false); deleter.revisitPolicy(); } diff --git a/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java b/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java index fa6ce637771..da69acae08b 100644 --- a/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java +++ b/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java @@ -38,6 +38,7 @@ import java.util.List; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.index.IndexFileNames; @@ -129,7 +130,9 @@ public abstract class FSDirectory extends BaseDirectory { /** Maps files that we are trying to delete (or we tried already but failed) * before attempting to delete that key. */ - protected final Set pendingDeletes = Collections.newSetFromMap(new ConcurrentHashMap()); + private final Set pendingDeletes = Collections.newSetFromMap(new ConcurrentHashMap()); + + private final AtomicInteger opsSinceLastDelete = new AtomicInteger(); /** Used to generate temp file names in {@link #createTempOutput}. */ private final AtomicLong nextTempFileCounter = new AtomicLong(); @@ -241,17 +244,23 @@ public abstract class FSDirectory extends BaseDirectory { @Override public IndexOutput createOutput(String name, IOContext context) throws IOException { ensureOpen(); - // nocommit do we need to check pending deletes? - deletePendingFiles(); + + // If this file was pending delete, we are now bringing it back to life: + pendingDeletes.remove(name); + maybeDeletePendingFiles(); return new FSIndexOutput(name); } @Override public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) throws IOException { ensureOpen(); + maybeDeletePendingFiles(); while (true) { try { String name = IndexFileNames.segmentFileName(prefix, suffix + "_" + Long.toString(nextTempFileCounter.getAndIncrement(), Character.MAX_RADIX), "tmp"); + if (pendingDeletes.contains(name)) { + continue; + } return new FSIndexOutput(name, StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.CREATE_NEW); } catch (FileAlreadyExistsException faee) { @@ -261,7 +270,7 @@ public abstract class FSDirectory extends BaseDirectory { } protected void ensureCanRead(String name) throws IOException { - deletePendingFiles(); + maybeDeletePendingFiles(); if (pendingDeletes.contains(name)) { throw new NoSuchFileException("file \"" + name + "\" is pending delete and cannot be opened for read"); } @@ -270,6 +279,7 @@ public abstract class FSDirectory extends BaseDirectory { @Override public void sync(Collection names) throws IOException { ensureOpen(); + maybeDeletePendingFiles(); for (String name : names) { fsync(name); @@ -279,6 +289,7 @@ public abstract class FSDirectory extends BaseDirectory { @Override public void renameFile(String source, String dest) throws IOException { ensureOpen(); + maybeDeletePendingFiles(); Files.move(directory.resolve(source), directory.resolve(dest), StandardCopyOption.ATOMIC_MOVE); // TODO: should we move directory fsync to a separate 'syncMetadata' method? // for example, to improve listCommits(), IndexFileDeleter could also call that after deleting segments_Ns @@ -288,7 +299,7 @@ public abstract class FSDirectory extends BaseDirectory { @Override public synchronized void close() throws IOException { isOpen = false; - deletePendingFiles(); + maybeDeletePendingFiles(); } /** @return the underlying filesystem directory */ @@ -303,12 +314,48 @@ public abstract class FSDirectory extends BaseDirectory { } protected void fsync(String name) throws IOException { - deletePendingFiles(); IOUtils.fsync(directory.resolve(name), false); } @Override public void deleteFile(String name) throws IOException { + if (pendingDeletes.contains(name)) { + throw new NoSuchFileException("file \"" + name + "\" is already pending delete"); + } + privateDeleteFile(name); + } + + /** Tries to delete any pending deleted files, and returns true if + * there are still files that could not be deleted. */ + public boolean checkPendingDeletions() throws IOException { + deletePendingFiles(); + return pendingDeletes.isEmpty() == false; + } + + /** Try to delete any pending files that we had previously tried to delete but failed + * because we are on Windows and the files were still held open. */ + public void deletePendingFiles() throws IOException { + + // TODO: we could fix IndexInputs from FSDirectory subclasses to call this when they are closed? + + // Clone the set since we mutate it in privateDeleteFile: + for(String name : new HashSet<>(pendingDeletes)) { + privateDeleteFile(name); + } + } + + private void maybeDeletePendingFiles() throws IOException { + if (pendingDeletes.isEmpty() == false) { + // This is a silly heuristic to try to avoid O(N^2), where N = number of files pending deletion, behavior: + int count = opsSinceLastDelete.incrementAndGet(); + if (count >= pendingDeletes.size()) { + opsSinceLastDelete.addAndGet(-count); + deletePendingFiles(); + } + } + } + + private void privateDeleteFile(String name) throws IOException { try { Files.delete(directory.resolve(name)); pendingDeletes.remove(name); @@ -331,28 +378,6 @@ public abstract class FSDirectory extends BaseDirectory { } } - /** Tries to delete any pending deleted files, and returns true if - * there are still files that could not be deleted. */ - public boolean checkPendingDeletions() throws IOException { - deletePendingFiles(); - return pendingDeletes.isEmpty() == false; - } - - /** Try to delete any pending files that we had previously tried to delete but failed - * because we are on Windows and the files were still held open. */ - public void deletePendingFiles() throws IOException { - // nocommit do we need exponential backoff here for windows? - - // TODO: we could fix IndexInputs from FSDirectory subclasses to call this when they are closed? - - Set toDelete = new HashSet<>(pendingDeletes); - - // nocommit heroic exceptions here or not? - for(String name : toDelete) { - deleteFile(name); - } - } - final class FSIndexOutput extends OutputStreamIndexOutput { /** * The maximum chunk size is 8192 bytes, because file channel mallocs diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index 3b450368f24..a75d0324a95 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -75,15 +75,18 @@ import org.apache.lucene.store.BaseDirectoryWrapper; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.NIOFSDirectory; import org.apache.lucene.store.NoLockFactory; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.store.SimpleFSLockFactory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Constants; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.LuceneTestCase; @@ -2703,26 +2706,43 @@ public class TestIndexWriter extends LuceneTestCase { IOUtils.close(r, r2, w, dir); } - // nocommit turn test on once we have VirusCheckingFS - /* public void testWithPendingDeletions() throws Exception { - try (FSDirectory dir = FSDirectory.open(createTempDir())) { + // irony: currently we don't emulate windows well enough to work on windows! + assumeFalse("windows is not supported", Constants.WINDOWS); + + Path path = createTempDir(); + + // Use WindowsFS to prevent open files from being deleted: + FileSystem fs = new WindowsFS(path.getFileSystem()).getFileSystem(URI.create("file:///")); + Path root = new FilterPath(path, fs); + + // MMapDirectory doesn't work because it closes its file handles after mapping! + try (FSDirectory dir = new SimpleFSDirectory(root)) { IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); IndexWriter w = new IndexWriter(dir, iwc); w.commit(); - IndexInput in = dir.openInput("segments_0", IOContext.DEFAULT); + IndexInput in = dir.openInput("segments_1", IOContext.DEFAULT); w.addDocument(new Document()); w.close(); assertTrue(dir.checkPendingDeletions()); + + // make sure we get NFSF if we try to delete and already-pending-delete file: + try { + dir.deleteFile("segments_1"); + fail("didn't hit exception"); + } catch (NoSuchFileException nfse) { + // expected + } + iwc = new IndexWriterConfig(new MockAnalyzer(random())); try { w = new IndexWriter(dir, iwc); } catch (IllegalArgumentException iae) { assertEquals("Directory still has pending deleted files", iae.getMessage()); } + in.close(); } } - */ public void testLeftoverTempFiles() throws Exception { Directory dir = newDirectory(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java b/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java index 3c7941029af..b86c04329be 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java +++ b/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java @@ -742,7 +742,6 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper { System.out.println("\nNOTE: MockDirectoryWrapper: now run CheckIndex"); } - // nocommit: we should also confirm all prior segments_N are not corrupt? TestUtil.checkIndex(this, getCrossCheckTermVectorsOnClose(), true); // TODO: factor this out / share w/ TestIW.assertNoUnreferencedFiles