HBASE-27590 Change Iterable to List in SnapshotFileCache (#4995)

Signed-off-by: Duo Zhang <zhangduo@apache.org>
This commit is contained in:
Peter Somogyi 2023-02-09 11:20:35 +01:00 committed by GitHub
parent 6a34aa8195
commit d2c5af11ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 7 deletions

View File

@ -178,15 +178,11 @@ public class SnapshotFileCache implements Stoppable {
* at that point, cache will still think the file system contains that file and return * at that point, cache will still think the file system contains that file and return
* <tt>true</tt>, even if it is no longer present (false positive). However, if the file never was * <tt>true</tt>, even if it is no longer present (false positive). However, if the file never was
* on the filesystem, we will never find it and always return <tt>false</tt>. * on the filesystem, we will never find it and always return <tt>false</tt>.
* @param files file to check, NOTE: Relies that files are loaded from hdfs before method is * @param files file to check
* called (NOT LAZY)
* @return <tt>unReferencedFiles</tt> the collection of files that do not have snapshot references * @return <tt>unReferencedFiles</tt> the collection of files that do not have snapshot references
* @throws IOException if there is an unexpected error reaching the filesystem. * @throws IOException if there is an unexpected error reaching the filesystem.
*/ */
// XXX this is inefficient to synchronize on the method, when what we really need to guard against public Iterable<FileStatus> getUnreferencedFiles(List<FileStatus> files,
// is an illegal access to the cache. Really we could do a mutex-guarded pointer swap on the
// cache, but that seems overkill at the moment and isn't necessarily a bottleneck.
public Iterable<FileStatus> getUnreferencedFiles(Iterable<FileStatus> files,
final SnapshotManager snapshotManager) throws IOException { final SnapshotManager snapshotManager) throws IOException {
List<FileStatus> unReferencedFiles = Lists.newArrayList(); List<FileStatus> unReferencedFiles = Lists.newArrayList();
List<String> snapshotsInProgress = null; List<String> snapshotsInProgress = null;

View File

@ -20,7 +20,10 @@ package org.apache.hadoop.hbase.master.snapshot;
import java.io.IOException; import java.io.IOException;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
@ -64,8 +67,15 @@ public class SnapshotHFileCleaner extends BaseHFileCleanerDelegate {
@Override @Override
public Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) { public Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) {
// The Iterable is lazy evaluated, so if we just pass this Iterable in, we will access the HFile
// storage inside the snapshot lock, which could take a lot of time (for example, several
// seconds), and block all other operations, especially other cleaners.
// So here we convert it to List first, to force it evaluated before calling
// getUnreferencedFiles, so we will not hold snapshot lock for a long time.
List<FileStatus> filesList =
StreamSupport.stream(files.spliterator(), false).collect(Collectors.toList());
try { try {
return cache.getUnreferencedFiles(files, master.getSnapshotManager()); return cache.getUnreferencedFiles(filesList, master.getSnapshotManager());
} catch (CorruptedSnapshotException cse) { } catch (CorruptedSnapshotException cse) {
LOG.debug("Corrupted in-progress snapshot file exception, ignored ", cse); LOG.debug("Corrupted in-progress snapshot file exception, ignored ", cse);
} catch (IOException e) { } catch (IOException e) {