From f6efb58b0735fa19a93fa9acae05666cdba8997a Mon Sep 17 00:00:00 2001 From: Stephen O'Donnell Date: Mon, 26 Apr 2021 11:00:23 +0100 Subject: [PATCH] HDFS-15621. Datanode DirectoryScanner uses excessive memory (#2849). Contributed by Stephen O'Donnell (cherry picked from commit 605ed85c291a6250b077da32a49dbb35f3b78bf7) Conflicts: hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java --- .../server/datanode/DirectoryScanner.java | 2 +- .../datanode/fsdataset/FsVolumeSpi.java | 118 ++++++++---------- .../datanode/fsdataset/impl/FsVolumeImpl.java | 5 +- .../server/datanode/TestDirectoryScanner.java | 37 +++--- 4 files changed, 75 insertions(+), 87 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java index be7e47a73e3..b48d6ce9ed0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java @@ -582,7 +582,7 @@ public class DirectoryScanner implements Runnable { long blockId, FsVolumeSpi vol) { statsRecord.missingBlockFile++; statsRecord.missingMetaFile++; - diffRecord.add(new ScanInfo(blockId, null, null, vol)); + diffRecord.add(new ScanInfo(blockId, null, null, null, vol)); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java index be978d75e9a..c1043aee176 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java @@ -224,27 +224,27 @@ public interface FsVolumeSpi */ public static class ScanInfo implements Comparable { private final long blockId; - /** - * The block file path, relative to the volume's base directory. - * If there was no block file found, this may be null. If 'vol' - * is null, then this is the full path of the block file. + * The full path to the folder containing the block / meta files. */ - private final String blockSuffix; - + private final File basePath; /** - * The suffix of the meta file path relative to the block file. - * If blockSuffix is null, then this will be the entire path relative - * to the volume base directory, or an absolute path if vol is also - * null. + * The block file name, with no path */ - private final String metaSuffix; + private final String blockFile; + /** + * Holds the meta file name, with no path, only if blockFile is null. + * If blockFile is not null, the meta file will be named identically to + * the blockFile, but with a suffix like "_1234.meta". If the blockFile + * is present, we store only the meta file suffix. + */ + private final String metaFile; private final FsVolumeSpi volume; private final FileRegion fileRegion; /** - * Get the file's length in async block scan + * Get the file's length in async block scan. */ private final long blockLength; @@ -254,35 +254,19 @@ public interface FsVolumeSpi private final static String QUOTED_FILE_SEPARATOR = Matcher.quoteReplacement(File.separator); - /** - * Get the most condensed version of the path. - * - * For example, the condensed version of /foo//bar is /foo/bar - * Unlike {@link File#getCanonicalPath()}, this will never perform I/O - * on the filesystem. - * - * @param path the path to condense - * @return the condensed path - */ - private static String getCondensedPath(String path) { - return CONDENSED_PATH_REGEX.matcher(path). - replaceAll(QUOTED_FILE_SEPARATOR); - } - /** * Get a path suffix. * - * @param f The file to get the suffix for. + * @param f The string to get the suffix for. * @param prefix The prefix we're stripping off. * - * @return A suffix such that prefix + suffix = path to f + * @return A suffix such that prefix + suffix = f */ - private static String getSuffix(File f, String prefix) { - String fullPath = getCondensedPath(f.getAbsolutePath()); - if (fullPath.startsWith(prefix)) { - return fullPath.substring(prefix.length()); + private static String getSuffix(String f, String prefix) { + if (f.startsWith(prefix)) { + return f.substring(prefix.length()); } - throw new RuntimeException(prefix + " is not a prefix of " + fullPath); + throw new RuntimeException(prefix + " is not a prefix of " + f); } /** @@ -290,27 +274,27 @@ public interface FsVolumeSpi * the block data and meta-data files. * * @param blockId the block ID - * @param blockFile the path to the block data file - * @param metaFile the path to the block meta-data file + * @param basePath The full path to the directory the block is stored in + * @param blockFile The block filename, with no path + * @param metaFile The meta filename, with no path. If blockFile is not null + * then the metaFile and blockFile should have the same + * prefix, with the meta file having a suffix like + * "_1234.meta". To save memory, if the blockFile is present + * we store only the meta file suffix in the object * @param vol the volume that contains the block */ - public ScanInfo(long blockId, File blockFile, File metaFile, - FsVolumeSpi vol) { + public ScanInfo(long blockId, File basePath, String blockFile, + String metaFile, FsVolumeSpi vol) { this.blockId = blockId; - String condensedVolPath = - (vol == null || vol.getBaseURI() == null) ? null : - getCondensedPath(new File(vol.getBaseURI()).getAbsolutePath()); - this.blockSuffix = blockFile == null ? null : - getSuffix(blockFile, condensedVolPath); - this.blockLength = (blockFile != null) ? blockFile.length() : 0; - if (metaFile == null) { - this.metaSuffix = null; - } else if (blockFile == null) { - this.metaSuffix = getSuffix(metaFile, condensedVolPath); + this.basePath = basePath; + this.blockFile = blockFile; + if (blockFile != null && metaFile != null) { + this.metaFile = getSuffix(metaFile, blockFile); } else { - this.metaSuffix = getSuffix(metaFile, - condensedVolPath + blockSuffix); + this.metaFile = metaFile; } + this.blockLength = (blockFile != null) ? + new File(basePath, blockFile).length() : 0; this.volume = vol; this.fileRegion = null; } @@ -330,8 +314,9 @@ public interface FsVolumeSpi this.blockLength = length; this.volume = vol; this.fileRegion = fileRegion; - this.blockSuffix = null; - this.metaSuffix = null; + this.basePath = null; + this.blockFile = null; + this.metaFile = null; } /** @@ -340,8 +325,8 @@ public interface FsVolumeSpi * @return the block data file */ public File getBlockFile() { - return (blockSuffix == null) ? null : - new File(new File(volume.getBaseURI()).getAbsolutePath(), blockSuffix); + return (blockFile == null) ? null : + new File(basePath.getAbsolutePath(), blockFile); } /** @@ -360,15 +345,10 @@ public interface FsVolumeSpi * @return the block meta data file */ public File getMetaFile() { - if (metaSuffix == null) { + if (metaFile == null) { return null; } - String fileSuffix = metaSuffix; - if (blockSuffix != null) { - fileSuffix = blockSuffix + metaSuffix; - } - return new File(new File(volume.getBaseURI()).getAbsolutePath(), - fileSuffix); + return new File(basePath.getAbsolutePath(), fullMetaFile()); } /** @@ -411,14 +391,24 @@ public interface FsVolumeSpi } public long getGenStamp() { - return metaSuffix != null ? Block.getGenerationStamp( - getMetaFile().getName()) : - HdfsConstants.GRANDFATHER_GENERATION_STAMP; + return metaFile != null ? Block.getGenerationStamp(fullMetaFile()) + : HdfsConstants.GRANDFATHER_GENERATION_STAMP; } public FileRegion getFileRegion() { return fileRegion; } + + private String fullMetaFile() { + if (metaFile == null) { + return null; + } + if (blockFile == null) { + return metaFile; + } else { + return blockFile + metaFile; + } + } } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java index e83b7c9d0e0..15048efc729 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java @@ -1392,7 +1392,7 @@ public class FsVolumeImpl implements FsVolumeSpi { long blockId = Block.getBlockId(file.getName()); verifyFileLocation(file, bpFinalizedDir, blockId); - report.add(new ScanInfo(blockId, null, file, this)); + report.add(new ScanInfo(blockId, dir, null, fileNames.get(i), this)); } continue; } @@ -1415,7 +1415,8 @@ public class FsVolumeImpl implements FsVolumeSpi { } } verifyFileLocation(blockFile, bpFinalizedDir, blockId); - report.add(new ScanInfo(blockId, blockFile, metaFile, this)); + report.add(new ScanInfo(blockId, dir, blockFile.getName(), + metaFile == null ? null : metaFile.getName(), this)); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java index 25470018a96..f80ad5bce0a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java @@ -1021,19 +1021,21 @@ public class TestDirectoryScanner { private final static String BPID_2 = "BP-367845636-127.0.0.1-5895645674231"; - void testScanInfoObject(long blockId, File blockFile, File metaFile) + void testScanInfoObject(long blockId, File baseDir, String blockFile, + String metaFile) throws Exception { FsVolumeSpi.ScanInfo scanInfo = - new FsVolumeSpi.ScanInfo(blockId, blockFile, metaFile, TEST_VOLUME); + new FsVolumeSpi.ScanInfo(blockId, baseDir, blockFile, metaFile, + TEST_VOLUME); assertEquals(blockId, scanInfo.getBlockId()); if (blockFile != null) { - assertEquals(blockFile.getAbsolutePath(), + assertEquals(new File(baseDir, blockFile).getAbsolutePath(), scanInfo.getBlockFile().getAbsolutePath()); } else { assertNull(scanInfo.getBlockFile()); } if (metaFile != null) { - assertEquals(metaFile.getAbsolutePath(), + assertEquals(new File(baseDir, metaFile).getAbsolutePath(), scanInfo.getMetaFile().getAbsolutePath()); } else { assertNull(scanInfo.getMetaFile()); @@ -1043,7 +1045,7 @@ public class TestDirectoryScanner { void testScanInfoObject(long blockId) throws Exception { FsVolumeSpi.ScanInfo scanInfo = - new FsVolumeSpi.ScanInfo(blockId, null, null, null); + new FsVolumeSpi.ScanInfo(blockId, null, null, null, null); assertEquals(blockId, scanInfo.getBlockId()); assertNull(scanInfo.getBlockFile()); assertNull(scanInfo.getMetaFile()); @@ -1052,24 +1054,19 @@ public class TestDirectoryScanner { @Test(timeout = 120000) public void TestScanInfo() throws Exception { testScanInfoObject(123, - new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(), - "blk_123"), - new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(), - "blk_123__1001.meta")); + new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()), + "blk_123", "blk_123__1001.meta"); testScanInfoObject(464, - new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(), - "blk_123"), - null); - testScanInfoObject(523, null, - new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(), - "blk_123__1009.meta")); - testScanInfoObject(789, null, null); + new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()), + "blk_123", null); + testScanInfoObject(523, + new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()), + null, "blk_123__1009.meta"); + testScanInfoObject(789, null, null, null); testScanInfoObject(456); testScanInfoObject(123, - new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(), - "blk_567"), - new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(), - "blk_567__1004.meta")); + new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath()), + "blk_567", "blk_567__1004.meta"); } /**