HDFS-15621. Datanode DirectoryScanner uses excessive memory (#2849). Contributed by Stephen O'Donnell

This commit is contained in:
Stephen O'Donnell 2021-04-26 11:00:23 +01:00 committed by GitHub
parent b968fa0957
commit 605ed85c29
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 77 additions and 89 deletions

View File

@ -582,7 +582,7 @@ public class DirectoryScanner implements Runnable {
long blockId, FsVolumeSpi vol) { long blockId, FsVolumeSpi vol) {
statsRecord.missingBlockFile++; statsRecord.missingBlockFile++;
statsRecord.missingMetaFile++; statsRecord.missingMetaFile++;
diffRecord.add(new ScanInfo(blockId, null, null, vol)); diffRecord.add(new ScanInfo(blockId, null, null, null, vol));
} }
/** /**

View File

@ -227,27 +227,27 @@ public interface FsVolumeSpi
*/ */
public static class ScanInfo implements Comparable<ScanInfo> { public static class ScanInfo implements Comparable<ScanInfo> {
private final long blockId; private final long blockId;
/** /**
* The block file path, relative to the volume's base directory. * The full path to the folder containing the block / meta files.
* If there was no block file found, this may be null. If 'vol'
* is null, then this is the full path of the block file.
*/ */
private final String blockSuffix; private final File basePath;
/** /**
* The suffix of the meta file path relative to the block file. * The block file name, with no path
* If blockSuffix is null, then this will be the entire path relative
* to the volume base directory, or an absolute path if vol is also
* null.
*/ */
private final String metaSuffix; private final String blockFile;
/**
* Holds the meta file name, with no path, only if blockFile is null.
* If blockFile is not null, the meta file will be named identically to
* the blockFile, but with a suffix like "_1234.meta". If the blockFile
* is present, we store only the meta file suffix.
*/
private final String metaFile;
private final FsVolumeSpi volume; private final FsVolumeSpi volume;
private final FileRegion fileRegion; private final FileRegion fileRegion;
/** /**
* Get the file's length in async block scan * Get the file's length in async block scan.
*/ */
private final long blockLength; private final long blockLength;
@ -257,35 +257,19 @@ public interface FsVolumeSpi
private final static String QUOTED_FILE_SEPARATOR = private final static String QUOTED_FILE_SEPARATOR =
Matcher.quoteReplacement(File.separator); Matcher.quoteReplacement(File.separator);
/**
* Get the most condensed version of the path.
*
* For example, the condensed version of /foo//bar is /foo/bar
* Unlike {@link File#getCanonicalPath()}, this will never perform I/O
* on the filesystem.
*
* @param path the path to condense
* @return the condensed path
*/
private static String getCondensedPath(String path) {
return CONDENSED_PATH_REGEX.matcher(path).
replaceAll(QUOTED_FILE_SEPARATOR);
}
/** /**
* Get a path suffix. * Get a path suffix.
* *
* @param f The file to get the suffix for. * @param f The string to get the suffix for.
* @param prefix The prefix we're stripping off. * @param prefix The prefix we're stripping off.
* *
* @return A suffix such that prefix + suffix = path to f * @return A suffix such that prefix + suffix = f
*/ */
private static String getSuffix(File f, String prefix) { private static String getSuffix(String f, String prefix) {
String fullPath = getCondensedPath(f.getAbsolutePath()); if (f.startsWith(prefix)) {
if (fullPath.startsWith(prefix)) { return f.substring(prefix.length());
return fullPath.substring(prefix.length());
} }
throw new RuntimeException(prefix + " is not a prefix of " + fullPath); throw new RuntimeException(prefix + " is not a prefix of " + f);
} }
/** /**
@ -293,27 +277,27 @@ public interface FsVolumeSpi
* the block data and meta-data files. * the block data and meta-data files.
* *
* @param blockId the block ID * @param blockId the block ID
* @param blockFile the path to the block data file * @param basePath The full path to the directory the block is stored in
* @param metaFile the path to the block meta-data file * @param blockFile The block filename, with no path
* @param metaFile The meta filename, with no path. If blockFile is not null
* then the metaFile and blockFile should have the same
* prefix, with the meta file having a suffix like
* "_1234.meta". To save memory, if the blockFile is present
* we store only the meta file suffix in the object
* @param vol the volume that contains the block * @param vol the volume that contains the block
*/ */
public ScanInfo(long blockId, File blockFile, File metaFile, public ScanInfo(long blockId, File basePath, String blockFile,
FsVolumeSpi vol) { String metaFile, FsVolumeSpi vol) {
this.blockId = blockId; this.blockId = blockId;
String condensedVolPath = this.basePath = basePath;
(vol == null || vol.getBaseURI() == null) ? null : this.blockFile = blockFile;
getCondensedPath(new File(vol.getBaseURI()).getAbsolutePath()); if (blockFile != null && metaFile != null) {
this.blockSuffix = blockFile == null ? null : this.metaFile = getSuffix(metaFile, blockFile);
getSuffix(blockFile, condensedVolPath);
this.blockLength = (blockFile != null) ? blockFile.length() : 0;
if (metaFile == null) {
this.metaSuffix = null;
} else if (blockFile == null) {
this.metaSuffix = getSuffix(metaFile, condensedVolPath);
} else { } else {
this.metaSuffix = getSuffix(metaFile, this.metaFile = metaFile;
condensedVolPath + blockSuffix);
} }
this.blockLength = (blockFile != null) ?
new File(basePath, blockFile).length() : 0;
this.volume = vol; this.volume = vol;
this.fileRegion = null; this.fileRegion = null;
} }
@ -333,8 +317,9 @@ public interface FsVolumeSpi
this.blockLength = length; this.blockLength = length;
this.volume = vol; this.volume = vol;
this.fileRegion = fileRegion; this.fileRegion = fileRegion;
this.blockSuffix = null; this.basePath = null;
this.metaSuffix = null; this.blockFile = null;
this.metaFile = null;
} }
/** /**
@ -343,8 +328,8 @@ public interface FsVolumeSpi
* @return the block data file * @return the block data file
*/ */
public File getBlockFile() { public File getBlockFile() {
return (blockSuffix == null) ? null : return (blockFile == null) ? null :
new File(new File(volume.getBaseURI()).getAbsolutePath(), blockSuffix); new File(basePath.getAbsolutePath(), blockFile);
} }
/** /**
@ -363,15 +348,10 @@ public interface FsVolumeSpi
* @return the block meta data file * @return the block meta data file
*/ */
public File getMetaFile() { public File getMetaFile() {
if (metaSuffix == null) { if (metaFile == null) {
return null; return null;
} }
String fileSuffix = metaSuffix; return new File(basePath.getAbsolutePath(), fullMetaFile());
if (blockSuffix != null) {
fileSuffix = blockSuffix + metaSuffix;
}
return new File(new File(volume.getBaseURI()).getAbsolutePath(),
fileSuffix);
} }
/** /**
@ -414,14 +394,24 @@ public interface FsVolumeSpi
} }
public long getGenStamp() { public long getGenStamp() {
return metaSuffix != null ? Block.getGenerationStamp( return metaFile != null ? Block.getGenerationStamp(fullMetaFile())
getMetaFile().getName()) : : HdfsConstants.GRANDFATHER_GENERATION_STAMP;
HdfsConstants.GRANDFATHER_GENERATION_STAMP;
} }
public FileRegion getFileRegion() { public FileRegion getFileRegion() {
return fileRegion; return fileRegion;
} }
private String fullMetaFile() {
if (metaFile == null) {
return null;
}
if (blockFile == null) {
return metaFile;
} else {
return blockFile + metaFile;
}
}
} }
/** /**

View File

@ -1451,7 +1451,7 @@ public class FsVolumeImpl implements FsVolumeSpi {
long blockId = Block.getBlockId(file.getName()); long blockId = Block.getBlockId(file.getName());
verifyFileLocation(file, bpFinalizedDir, verifyFileLocation(file, bpFinalizedDir,
blockId); blockId);
report.add(new ScanInfo(blockId, null, file, this)); report.add(new ScanInfo(blockId, dir, null, fileNames.get(i), this));
} }
continue; continue;
} }
@ -1474,7 +1474,8 @@ public class FsVolumeImpl implements FsVolumeSpi {
} }
} }
verifyFileLocation(blockFile, bpFinalizedDir, blockId); verifyFileLocation(blockFile, bpFinalizedDir, blockId);
report.add(new ScanInfo(blockId, blockFile, metaFile, this)); report.add(new ScanInfo(blockId, dir, blockFile.getName(),
metaFile == null ? null : metaFile.getName(), this));
} }
} }

View File

@ -1040,19 +1040,21 @@ public class TestDirectoryScanner {
private final static String BPID_2 = "BP-367845636-127.0.0.1-5895645674231"; private final static String BPID_2 = "BP-367845636-127.0.0.1-5895645674231";
void testScanInfoObject(long blockId, File blockFile, File metaFile) void testScanInfoObject(long blockId, File baseDir, String blockFile,
String metaFile)
throws Exception { throws Exception {
FsVolumeSpi.ScanInfo scanInfo = FsVolumeSpi.ScanInfo scanInfo =
new FsVolumeSpi.ScanInfo(blockId, blockFile, metaFile, TEST_VOLUME); new FsVolumeSpi.ScanInfo(blockId, baseDir, blockFile, metaFile,
TEST_VOLUME);
assertEquals(blockId, scanInfo.getBlockId()); assertEquals(blockId, scanInfo.getBlockId());
if (blockFile != null) { if (blockFile != null) {
assertEquals(blockFile.getAbsolutePath(), assertEquals(new File(baseDir, blockFile).getAbsolutePath(),
scanInfo.getBlockFile().getAbsolutePath()); scanInfo.getBlockFile().getAbsolutePath());
} else { } else {
assertNull(scanInfo.getBlockFile()); assertNull(scanInfo.getBlockFile());
} }
if (metaFile != null) { if (metaFile != null) {
assertEquals(metaFile.getAbsolutePath(), assertEquals(new File(baseDir, metaFile).getAbsolutePath(),
scanInfo.getMetaFile().getAbsolutePath()); scanInfo.getMetaFile().getAbsolutePath());
} else { } else {
assertNull(scanInfo.getMetaFile()); assertNull(scanInfo.getMetaFile());
@ -1062,7 +1064,7 @@ public class TestDirectoryScanner {
void testScanInfoObject(long blockId) throws Exception { void testScanInfoObject(long blockId) throws Exception {
FsVolumeSpi.ScanInfo scanInfo = FsVolumeSpi.ScanInfo scanInfo =
new FsVolumeSpi.ScanInfo(blockId, null, null, null); new FsVolumeSpi.ScanInfo(blockId, null, null, null, null);
assertEquals(blockId, scanInfo.getBlockId()); assertEquals(blockId, scanInfo.getBlockId());
assertNull(scanInfo.getBlockFile()); assertNull(scanInfo.getBlockFile());
assertNull(scanInfo.getMetaFile()); assertNull(scanInfo.getMetaFile());
@ -1071,24 +1073,19 @@ public class TestDirectoryScanner {
@Test(timeout = 120000) @Test(timeout = 120000)
public void TestScanInfo() throws Exception { public void TestScanInfo() throws Exception {
testScanInfoObject(123, testScanInfoObject(123,
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(), new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()),
"blk_123"), "blk_123", "blk_123__1001.meta");
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
"blk_123__1001.meta"));
testScanInfoObject(464, testScanInfoObject(464,
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(), new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()),
"blk_123"), "blk_123", null);
null); testScanInfoObject(523,
testScanInfoObject(523, null, new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()),
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(), null, "blk_123__1009.meta");
"blk_123__1009.meta")); testScanInfoObject(789, null, null, null);
testScanInfoObject(789, null, null);
testScanInfoObject(456); testScanInfoObject(456);
testScanInfoObject(123, testScanInfoObject(123,
new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(), new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath()),
"blk_567"), "blk_567", "blk_567__1004.meta");
new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
"blk_567__1004.meta"));
} }
/** /**

View File

@ -1786,8 +1786,8 @@ public class TestFsDatasetImpl {
assertFalse(metaFile.exists()); assertFalse(metaFile.exists());
FsVolumeSpi.ScanInfo info = new FsVolumeSpi.ScanInfo( FsVolumeSpi.ScanInfo info = new FsVolumeSpi.ScanInfo(
replicaInfo.getBlockId(), blockFile.getAbsoluteFile(), replicaInfo.getBlockId(), blockFile.getParentFile().getAbsoluteFile(),
metaFile.getAbsoluteFile(), replicaInfo.getVolume()); blockFile.getName(), metaFile.getName(), replicaInfo.getVolume());
fsdataset.checkAndUpdate(bpid, info); fsdataset.checkAndUpdate(bpid, info);
BlockManager blockManager = cluster.getNameNode(). BlockManager blockManager = cluster.getNameNode().