Refactor part of IndexFileDeleter and ReplicaFileDeleter into a common utility class (#12126)

This commit is contained in:
Patrick Zhai 2023-03-15 20:51:49 -07:00 committed by GitHub
parent f324204019
commit d3b6ef3c86
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 343 additions and 298 deletions

View File

@ -146,6 +146,9 @@ Improvements
* GITHUB#12166: Remove the now unused class pointInPolygon. (Marcus Eagan via Christine Poerschke and Nick Knize)
* GITHUB#12126: Refactor part of IndexFileDeleter and ReplicaFileDeleter into a public common utility class
FileDeleter. (Patrick Zhai)
Optimizations
---------------------

View File

@ -17,9 +17,7 @@
package org.apache.lucene.index;
import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.NoSuchFileException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@ -35,7 +33,7 @@ import java.util.regex.Matcher;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.FileDeleter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InfoStream;
@ -66,11 +64,6 @@ import org.apache.lucene.util.InfoStream;
*/
final class IndexFileDeleter implements Closeable {
/* Reference count for all files in the index.
* Counts how many existing commits reference a file.
**/
private Map<String, RefCount> refCounts = new HashMap<>();
/* Holds all commits (segments_N) currently in the index.
* This will have just 1 commit if you are using the
* default delete policy (KeepOnlyLastCommitDeletionPolicy).
@ -96,6 +89,8 @@ final class IndexFileDeleter implements Closeable {
/** Change to true to see details of reference counts when infoStream is enabled */
public static boolean VERBOSE_REF_COUNTS = false;
private final FileDeleter fileDeleter;
private final IndexWriter writer;
// called only from assert
@ -140,6 +135,8 @@ final class IndexFileDeleter implements Closeable {
this.directoryOrig = directoryOrig;
this.directory = directory;
this.fileDeleter = new FileDeleter(directory, this::logInfo);
// First pass: walk the files and initialize our ref
// counts:
CommitPoint currentCommitPoint = null;
@ -154,7 +151,7 @@ final class IndexFileDeleter implements Closeable {
|| fileName.startsWith(IndexFileNames.PENDING_SEGMENTS))) {
// Add this file to refCounts with initial count 0:
getRefCount(fileName);
fileDeleter.initRefCount(fileName);
if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
@ -214,7 +211,7 @@ final class IndexFileDeleter implements Closeable {
// We keep commits list in sorted order (oldest to newest):
CollectionUtil.timSort(commits);
Collection<String> relevantFiles = new HashSet<>(refCounts.keySet());
Collection<String> relevantFiles = new HashSet<>(fileDeleter.getAllFiles());
Set<String> pendingDeletions = directoryOrig.getPendingDeletions();
if (pendingDeletions.isEmpty() == false) {
relevantFiles.addAll(pendingDeletions);
@ -225,24 +222,18 @@ final class IndexFileDeleter implements Closeable {
// Now delete anything with ref count at 0. These are
// presumably abandoned files eg due to crash of
// IndexWriter.
Set<String> toDelete = new HashSet<>();
for (Map.Entry<String, RefCount> entry : refCounts.entrySet()) {
RefCount rc = entry.getValue();
final String fileName = entry.getKey();
if (0 == rc.count) {
// A segments_N file should never have ref count 0 on init:
if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
throw new IllegalStateException(
"file \"" + fileName + "\" has refCount=0, which should never happen on init");
}
if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", "init: removing unreferenced file \"" + fileName + "\"");
}
toDelete.add(fileName);
Set<String> toDelete = fileDeleter.getUnrefedFiles();
for (String fileName : toDelete) {
if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
throw new IllegalStateException(
"file \"" + fileName + "\" has refCount=0, which should never happen on init");
}
if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", "init: removing unreferenced file \"" + fileName + "\"");
}
}
deleteFiles(toDelete);
fileDeleter.deleteFilesIfNoRef(toDelete);
// Finally, give policy a chance to remove things on
// startup:
@ -484,7 +475,7 @@ final class IndexFileDeleter implements Closeable {
String fileName = files[i];
m.reset(fileName);
if (!fileName.endsWith("write.lock")
&& !refCounts.containsKey(fileName)
&& fileDeleter.exists(fileName) == false
&& (m.matches()
|| fileName.startsWith(IndexFileNames.SEGMENTS)
// we only try to clear out pending_segments_N during rollback(), because we don't
@ -502,7 +493,7 @@ final class IndexFileDeleter implements Closeable {
}
}
deleteFiles(toDelete);
fileDeleter.deleteFilesIfNoRef(toDelete);
}
@Override
@ -610,76 +601,34 @@ final class IndexFileDeleter implements Closeable {
}
}
private void logInfo(FileDeleter.MsgType msgType, String msg) {
if (msgType == FileDeleter.MsgType.REF && VERBOSE_REF_COUNTS == false) {
// do not log anything
} else {
if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", msg);
}
}
}
void incRef(SegmentInfos segmentInfos, boolean isCommit) throws IOException {
assert locked();
// If this is a commit point, also incRef the
// segments_N file:
for (final String fileName : segmentInfos.files(isCommit)) {
incRef(fileName);
fileDeleter.incRef(fileName);
}
}
void incRef(Collection<String> files) {
assert locked();
for (final String file : files) {
incRef(file);
}
}
void incRef(String fileName) {
assert locked();
RefCount rc = getRefCount(fileName);
if (infoStream.isEnabled("IFD")) {
if (VERBOSE_REF_COUNTS) {
infoStream.message("IFD", " IncRef \"" + fileName + "\": pre-incr count is " + rc.count);
}
}
rc.IncRef();
fileDeleter.incRef(files);
}
/** Decrefs all provided files, even on exception; throws first exception hit, if any. */
void decRef(Collection<String> files) throws IOException {
assert locked();
Set<String> toDelete = new HashSet<>();
Throwable firstThrowable = null;
for (final String file : files) {
try {
if (decRef(file)) {
toDelete.add(file);
}
} catch (Throwable t) {
firstThrowable = IOUtils.useOrSuppress(firstThrowable, t);
}
}
try {
deleteFiles(toDelete);
} catch (Throwable t) {
firstThrowable = IOUtils.useOrSuppress(firstThrowable, t);
}
if (firstThrowable != null) {
throw IOUtils.rethrowAlways(firstThrowable);
}
}
/** Returns true if the file should now be deleted. */
private boolean decRef(String fileName) {
assert locked();
RefCount rc = getRefCount(fileName);
if (infoStream.isEnabled("IFD")) {
if (VERBOSE_REF_COUNTS) {
infoStream.message("IFD", " DecRef \"" + fileName + "\": pre-decr count is " + rc.count);
}
}
if (rc.DecRef() == 0) {
// This file is no longer referenced by any past
// commit points nor by the in-memory SegmentInfos:
refCounts.remove(fileName);
return true;
} else {
return false;
}
fileDeleter.decRef(files);
}
void decRef(SegmentInfos segmentInfos) throws IOException {
@ -689,128 +638,13 @@ final class IndexFileDeleter implements Closeable {
public boolean exists(String fileName) {
assert locked();
if (!refCounts.containsKey(fileName)) {
return false;
} else {
return getRefCount(fileName).count > 0;
}
}
private RefCount getRefCount(String fileName) {
assert locked();
RefCount rc;
if (!refCounts.containsKey(fileName)) {
rc = new RefCount(fileName);
refCounts.put(fileName, rc);
} else {
rc = refCounts.get(fileName);
}
return rc;
return fileDeleter.exists(fileName);
}
/** Deletes the specified files, but only if they are new (have not yet been incref'd). */
void deleteNewFiles(Collection<String> files) throws IOException {
assert locked();
Set<String> toDelete = new HashSet<>();
for (final String fileName : files) {
// NOTE: it's very unusual yet possible for the
// refCount to be present and 0: it can happen if you
// open IW on a crashed index, and it removes a bunch
// of unref'd files, and then you add new docs / do
// merging, and it reuses that segment name.
// TestCrash.testCrashAfterReopen can hit this:
if (!refCounts.containsKey(fileName) || refCounts.get(fileName).count == 0) {
if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", "will delete new file \"" + fileName + "\"");
}
toDelete.add(fileName);
}
}
deleteFiles(toDelete);
}
private void deleteFiles(Collection<String> names) throws IOException {
assert locked();
ensureOpen();
if (infoStream.isEnabled("IFD")) {
if (names.size() > 0) {
infoStream.message("IFD", "delete " + names + "");
}
}
// We make two passes, first deleting any segments_N files, second deleting the rest. We do
// this so that if we throw exc or JVM
// crashes during deletions, even when not on Windows, we don't leave the index in an
// "apparently corrupt" state:
for (String name : names) {
if (name.startsWith(IndexFileNames.SEGMENTS) == false) {
continue;
}
deleteFile(name);
}
for (String name : names) {
if (name.startsWith(IndexFileNames.SEGMENTS) == true) {
continue;
}
deleteFile(name);
}
}
private void deleteFile(String fileName) throws IOException {
try {
directory.deleteFile(fileName);
} catch (NoSuchFileException | FileNotFoundException e) {
if (Constants.WINDOWS) {
// TODO: can we remove this OS-specific hacky logic? If windows deleteFile is buggy, we
// should instead contain this workaround in
// a WindowsFSDirectory ...
// LUCENE-6684: we suppress this assert for Windows, since a file could be in a confusing
// "pending delete" state, where we already
// deleted it once, yet it still shows up in directory listings, and if you try to delete it
// again you'll hit NSFE/FNFE:
} else {
throw e;
}
}
}
/** Tracks the reference count for a single index file: */
private static final class RefCount {
// fileName used only for better assert error messages
final String fileName;
boolean initDone;
RefCount(String fileName) {
this.fileName = fileName;
}
int count;
public int IncRef() {
if (!initDone) {
initDone = true;
} else {
assert count > 0
: Thread.currentThread().getName()
+ ": RefCount is 0 pre-increment for file \""
+ fileName
+ "\"";
}
return ++count;
}
public int DecRef() {
assert count > 0
: Thread.currentThread().getName()
+ ": RefCount is 0 pre-decrement for file \""
+ fileName
+ "\"";
return --count;
}
fileDeleter.deleteFilesIfNoRef(files);
}
/**

View File

@ -0,0 +1,285 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.NoSuchFileException;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.function.BiConsumer;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.store.Directory;
/**
* This class provides ability to track the reference counts of a set of index files and delete them
* when their counts decreased to 0.
*
* <p>This class is NOT thread-safe, the user should make sure the thread-safety themselves
*
* @lucene.internal
*/
public final class FileDeleter {
private final Map<String, RefCount> refCounts = new HashMap<>();
private final Directory directory;
/*
* user specified message consumer, first argument will be message type
* second argument will be the actual message
*/
private final BiConsumer<MsgType, String> messenger;
/*
* used to return 0 ref count
*/
private static final RefCount ZERO_REF = new RefCount("");
/**
* Create a new FileDeleter with a messenger consumes various verbose messages
*
* @param directory the index directory
* @param messenger two arguments will be passed in, {@link MsgType} and the actual message in
* String. Can be null if the user do not want debug infos
*/
public FileDeleter(Directory directory, BiConsumer<MsgType, String> messenger) {
this.directory = directory;
this.messenger = messenger;
}
/**
* Types of messages this file deleter will broadcast REF: messages about reference FILE: messages
* about file
*/
public enum MsgType {
REF,
FILE
}
public void incRef(Collection<String> fileNames) {
for (String file : fileNames) {
incRef(file);
}
}
public void incRef(String fileName) {
RefCount rc = getRefCountInternal(fileName);
if (messenger != null) {
messenger.accept(MsgType.REF, "IncRef \"" + fileName + "\": pre-incr count is " + rc.count);
}
rc.incRef();
}
/**
* Decrease ref counts for all provided files, delete them if ref counts down to 0, even on
* exception. Throw first exception hit, if any
*/
public void decRef(Collection<String> fileNames) throws IOException {
Set<String> toDelete = new HashSet<>();
Throwable firstThrowable = null;
for (String fileName : fileNames) {
try {
if (decRef(fileName)) {
toDelete.add(fileName);
}
} catch (Throwable t) {
firstThrowable = IOUtils.useOrSuppress(firstThrowable, t);
}
}
try {
delete(toDelete);
} catch (Throwable t) {
firstThrowable = IOUtils.useOrSuppress(firstThrowable, t);
}
if (firstThrowable != null) {
throw IOUtils.rethrowAlways(firstThrowable);
}
}
/** Returns true if the file should be deleted */
private boolean decRef(String fileName) {
RefCount rc = getRefCountInternal(fileName);
if (messenger != null) {
messenger.accept(MsgType.REF, "DecRef \"" + fileName + "\": pre-decr count is " + rc.count);
}
if (rc.decRef() == 0) {
refCounts.remove(fileName);
return true;
}
return false;
}
private RefCount getRefCountInternal(String fileName) {
return refCounts.computeIfAbsent(fileName, RefCount::new);
}
/** if the file is not yet recorded, this method will create a new RefCount object with count 0 */
public void initRefCount(String fileName) {
refCounts.computeIfAbsent(fileName, RefCount::new);
}
/**
* get ref count for a provided file, if the file is not yet recorded, this method will return 0
*/
public int getRefCount(String fileName) {
return refCounts.getOrDefault(fileName, ZERO_REF).count;
}
/** get all files, some of them may have ref count 0 */
public Set<String> getAllFiles() {
return refCounts.keySet();
}
/** return true only if file is touched and also has larger than 0 ref count */
public boolean exists(String fileName) {
return refCounts.containsKey(fileName) && refCounts.get(fileName).count > 0;
}
/** get files that are touched but not incref'ed */
public Set<String> getUnrefedFiles() {
Set<String> unrefed = new HashSet<>();
for (var entry : refCounts.entrySet()) {
RefCount rc = entry.getValue();
String fileName = entry.getKey();
if (rc.count == 0) {
messenger.accept(MsgType.FILE, "removing unreferenced file \"" + fileName + "\"");
unrefed.add(fileName);
}
}
return unrefed;
}
/** delete only files that are unref'ed */
public void deleteFilesIfNoRef(Collection<String> files) throws IOException {
Set<String> toDelete = new HashSet<>();
for (final String fileName : files) {
// NOTE: it's very unusual yet possible for the
// refCount to be present and 0: it can happen if you
// open IW on a crashed index, and it removes a bunch
// of unref'd files, and then you add new docs / do
// merging, and it reuses that segment name.
// TestCrash.testCrashAfterReopen can hit this:
if (exists(fileName) == false) {
if (messenger != null) {
messenger.accept(MsgType.FILE, "will delete new file \"" + fileName + "\"");
}
toDelete.add(fileName);
}
}
delete(toDelete);
}
public void forceDelete(String fileName) throws IOException {
refCounts.remove(fileName);
delete(fileName);
}
public void deleteFileIfNoRef(String fileName) throws IOException {
if (exists(fileName) == false) {
if (messenger != null) {
messenger.accept(MsgType.FILE, "will delete new file \"" + fileName + "\"");
}
delete(fileName);
}
}
private void delete(Collection<String> toDelete) throws IOException {
if (messenger != null) {
messenger.accept(MsgType.FILE, "now delete " + toDelete.size() + " files: " + toDelete);
}
// First pass: delete any segments_N files. We do these first to be certain stale commit points
// are removed
// before we remove any files they reference, in case we crash right now:
for (String fileName : toDelete) {
assert exists(fileName) == false;
if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
delete(fileName);
}
}
// Only delete other files if we were able to remove the segments_N files; this way we never
// leave a corrupt commit in the index even in the presense of virus checkers:
for (String fileName : toDelete) {
assert exists(fileName) == false;
if (fileName.startsWith(IndexFileNames.SEGMENTS) == false) {
delete(fileName);
}
}
}
private void delete(String fileName) throws IOException {
try {
directory.deleteFile(fileName);
} catch (NoSuchFileException | FileNotFoundException e) {
if (Constants.WINDOWS) {
// TODO: can we remove this OS-specific hacky logic? If windows deleteFile is buggy, we
// should instead contain this workaround in
// a WindowsFSDirectory ...
// LUCENE-6684: we suppress this assert for Windows, since a file could be in a confusing
// "pending delete" state, where we already
// deleted it once, yet it still shows up in directory listings, and if you try to delete it
// again you'll hit NSFE/FNFE:
} else {
throw e;
}
}
}
/** Tracks the reference count for a single index file: */
public static final class RefCount {
// fileName used only for better assert error messages
final String fileName;
boolean initDone;
RefCount(String fileName) {
this.fileName = fileName;
}
int count;
public int incRef() {
if (initDone == false) {
initDone = true;
} else {
assert count > 0
: Thread.currentThread().getName()
+ ": RefCount is 0 pre-increment for file \""
+ fileName
+ "\"";
}
return ++count;
}
public int decRef() {
assert count > 0
: Thread.currentThread().getName()
+ ": RefCount is 0 pre-decrement for file \""
+ fileName
+ "\"";
return --count;
}
}
}

View File

@ -206,7 +206,7 @@ public abstract class CopyJob implements Comparable<CopyJob> {
if (Node.VERBOSE_FILES) {
dest.message("remove partial file " + prevJob.current.tmpName);
}
dest.deleter.deleteNewFile(prevJob.current.tmpName);
dest.deleter.forceDeleteFile(prevJob.current.tmpName);
prevJob.current = null;
}
}
@ -252,7 +252,7 @@ public abstract class CopyJob implements Comparable<CopyJob> {
if (Node.VERBOSE_FILES) {
dest.message("remove partial file " + current.tmpName);
}
dest.deleter.deleteNewFile(current.tmpName);
dest.deleter.forceDeleteFile(current.tmpName);
current = null;
}
}

View File

@ -17,72 +17,36 @@
package org.apache.lucene.replicator.nrt;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.NoSuchFileException;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
// TODO: can we factor/share with IFD: this is doing exactly the same thing, but on the replica side
import org.apache.lucene.util.FileDeleter;
class ReplicaFileDeleter {
private final Map<String, Integer> refCounts = new HashMap<String, Integer>();
private final FileDeleter fileDeleter;
private final Directory dir;
private final Node node;
public ReplicaFileDeleter(Node node, Directory dir) throws IOException {
this.dir = dir;
this.node = node;
}
/**
* Used only by asserts: returns true if the file exists (can be opened), false if it cannot be
* opened, and (unlike Java's File.exists) throws IOException if there's some unexpected error.
*/
private static boolean slowFileExists(Directory dir, String fileName) throws IOException {
try {
dir.openInput(fileName, IOContext.DEFAULT).close();
return true;
} catch (@SuppressWarnings("unused") NoSuchFileException | FileNotFoundException e) {
return false;
}
this.fileDeleter =
new FileDeleter(
dir,
((msgType, s) -> {
if (msgType == FileDeleter.MsgType.FILE && Node.VERBOSE_FILES) {
node.message(s);
}
}));
}
public synchronized void incRef(Collection<String> fileNames) throws IOException {
for (String fileName : fileNames) {
assert slowFileExists(dir, fileName) : "file " + fileName + " does not exist!";
Integer curCount = refCounts.get(fileName);
if (curCount == null) {
refCounts.put(fileName, 1);
} else {
refCounts.put(fileName, curCount.intValue() + 1);
}
}
fileDeleter.incRef(fileNames);
}
public synchronized void decRef(Collection<String> fileNames) throws IOException {
Set<String> toDelete = new HashSet<>();
for (String fileName : fileNames) {
Integer curCount = refCounts.get(fileName);
assert curCount != null : "fileName=" + fileName;
assert curCount.intValue() > 0;
if (curCount.intValue() == 1) {
refCounts.remove(fileName);
toDelete.add(fileName);
} else {
refCounts.put(fileName, curCount.intValue() - 1);
}
}
delete(toDelete);
fileDeleter.decRef(fileNames);
// TODO: this local IR could incRef files here, like we do now with IW's NRT readers ... then we
// can assert this again:
@ -100,69 +64,28 @@ class ReplicaFileDeleter {
*/
}
private synchronized void delete(Collection<String> toDelete) throws IOException {
if (Node.VERBOSE_FILES) {
node.message("now delete " + toDelete.size() + " files: " + toDelete);
}
// First pass: delete any segments_N files. We do these first to be certain stale commit points
// are removed
// before we remove any files they reference, in case we crash right now:
for (String fileName : toDelete) {
assert refCounts.containsKey(fileName) == false;
if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
delete(fileName);
}
}
// Only delete other files if we were able to remove the segments_N files; this way we never
// leave a corrupt commit in the index even in the presense of virus checkers:
for (String fileName : toDelete) {
assert refCounts.containsKey(fileName) == false;
if (fileName.startsWith(IndexFileNames.SEGMENTS) == false) {
delete(fileName);
}
}
}
private synchronized void delete(String fileName) throws IOException {
if (Node.VERBOSE_FILES) {
node.message("file " + fileName + ": now delete");
}
dir.deleteFile(fileName);
}
public synchronized Integer getRefCount(String fileName) {
return refCounts.get(fileName);
public synchronized int getRefCount(String fileName) {
return fileDeleter.getRefCount(fileName);
}
public synchronized void deleteIfNoRef(String fileName) throws IOException {
if (refCounts.containsKey(fileName) == false) {
deleteNewFile(fileName);
}
fileDeleter.deleteFileIfNoRef(fileName);
}
public synchronized void deleteNewFile(String fileName) throws IOException {
delete(fileName);
public synchronized void forceDeleteFile(String fileName) throws IOException {
fileDeleter.forceDelete(fileName);
}
/*
public synchronized Set<String> getPending() {
return new HashSet<String>(pending);
}
*/
public synchronized void deleteUnknownFiles(String segmentsFileName) throws IOException {
Set<String> toDelete = new HashSet<>();
Set<String> toDelete = fileDeleter.getUnrefedFiles();
for (String fileName : dir.listAll()) {
if (refCounts.containsKey(fileName) == false
if (fileDeleter.exists(fileName) == false
&& fileName.equals("write.lock") == false
&& fileName.equals(segmentsFileName) == false) {
node.message("will delete unknown file \"" + fileName + "\"");
toDelete.add(fileName);
}
}
delete(toDelete);
fileDeleter.deleteFilesIfNoRef(toDelete);
}
}