HDFS-15621. Datanode DirectoryScanner uses excessive memory (#2849). Contributed by Stephen O'Donnell

This commit is contained in:
Stephen O'Donnell 2021-04-26 11:00:23 +01:00 committed by GitHub
parent b968fa0957
commit 605ed85c29
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 77 additions and 89 deletions

View File

@ -582,7 +582,7 @@ public class DirectoryScanner implements Runnable {
long blockId, FsVolumeSpi vol) {
statsRecord.missingBlockFile++;
statsRecord.missingMetaFile++;
diffRecord.add(new ScanInfo(blockId, null, null, vol));
diffRecord.add(new ScanInfo(blockId, null, null, null, vol));
}
/**

View File

@ -227,27 +227,27 @@ public interface FsVolumeSpi
*/
public static class ScanInfo implements Comparable<ScanInfo> {
private final long blockId;
/**
* The block file path, relative to the volume's base directory.
* If there was no block file found, this may be null. If 'vol'
* is null, then this is the full path of the block file.
* The full path to the folder containing the block / meta files.
*/
private final String blockSuffix;
private final File basePath;
/**
* The suffix of the meta file path relative to the block file.
* If blockSuffix is null, then this will be the entire path relative
* to the volume base directory, or an absolute path if vol is also
* null.
* The block file name, with no path
*/
private final String metaSuffix;
private final String blockFile;
/**
* Holds the meta file name, with no path, only if blockFile is null.
* If blockFile is not null, the meta file will be named identically to
* the blockFile, but with a suffix like "_1234.meta". If the blockFile
* is present, we store only the meta file suffix.
*/
private final String metaFile;
private final FsVolumeSpi volume;
private final FileRegion fileRegion;
/**
* Get the file's length in async block scan
* Get the file's length in async block scan.
*/
private final long blockLength;
@ -257,35 +257,19 @@ public interface FsVolumeSpi
private final static String QUOTED_FILE_SEPARATOR =
Matcher.quoteReplacement(File.separator);
/**
* Get the most condensed version of the path.
*
* For example, the condensed version of /foo//bar is /foo/bar
* Unlike {@link File#getCanonicalPath()}, this will never perform I/O
* on the filesystem.
*
* @param path the path to condense
* @return the condensed path
*/
private static String getCondensedPath(String path) {
return CONDENSED_PATH_REGEX.matcher(path).
replaceAll(QUOTED_FILE_SEPARATOR);
}
/**
* Get a path suffix.
*
* @param f The file to get the suffix for.
* @param f The string to get the suffix for.
* @param prefix The prefix we're stripping off.
*
* @return A suffix such that prefix + suffix = path to f
* @return A suffix such that prefix + suffix = f
*/
private static String getSuffix(File f, String prefix) {
String fullPath = getCondensedPath(f.getAbsolutePath());
if (fullPath.startsWith(prefix)) {
return fullPath.substring(prefix.length());
private static String getSuffix(String f, String prefix) {
if (f.startsWith(prefix)) {
return f.substring(prefix.length());
}
throw new RuntimeException(prefix + " is not a prefix of " + fullPath);
throw new RuntimeException(prefix + " is not a prefix of " + f);
}
/**
@ -293,27 +277,27 @@ public interface FsVolumeSpi
* the block data and meta-data files.
*
* @param blockId the block ID
* @param blockFile the path to the block data file
* @param metaFile the path to the block meta-data file
* @param basePath The full path to the directory the block is stored in
* @param blockFile The block filename, with no path
* @param metaFile The meta filename, with no path. If blockFile is not null
* then the metaFile and blockFile should have the same
* prefix, with the meta file having a suffix like
* "_1234.meta". To save memory, if the blockFile is present
* we store only the meta file suffix in the object
* @param vol the volume that contains the block
*/
public ScanInfo(long blockId, File blockFile, File metaFile,
FsVolumeSpi vol) {
public ScanInfo(long blockId, File basePath, String blockFile,
String metaFile, FsVolumeSpi vol) {
this.blockId = blockId;
String condensedVolPath =
(vol == null || vol.getBaseURI() == null) ? null :
getCondensedPath(new File(vol.getBaseURI()).getAbsolutePath());
this.blockSuffix = blockFile == null ? null :
getSuffix(blockFile, condensedVolPath);
this.blockLength = (blockFile != null) ? blockFile.length() : 0;
if (metaFile == null) {
this.metaSuffix = null;
} else if (blockFile == null) {
this.metaSuffix = getSuffix(metaFile, condensedVolPath);
this.basePath = basePath;
this.blockFile = blockFile;
if (blockFile != null && metaFile != null) {
this.metaFile = getSuffix(metaFile, blockFile);
} else {
this.metaSuffix = getSuffix(metaFile,
condensedVolPath + blockSuffix);
this.metaFile = metaFile;
}
this.blockLength = (blockFile != null) ?
new File(basePath, blockFile).length() : 0;
this.volume = vol;
this.fileRegion = null;
}
@ -333,8 +317,9 @@ public interface FsVolumeSpi
this.blockLength = length;
this.volume = vol;
this.fileRegion = fileRegion;
this.blockSuffix = null;
this.metaSuffix = null;
this.basePath = null;
this.blockFile = null;
this.metaFile = null;
}
/**
@ -343,8 +328,8 @@ public interface FsVolumeSpi
* @return the block data file
*/
public File getBlockFile() {
return (blockSuffix == null) ? null :
new File(new File(volume.getBaseURI()).getAbsolutePath(), blockSuffix);
return (blockFile == null) ? null :
new File(basePath.getAbsolutePath(), blockFile);
}
/**
@ -363,15 +348,10 @@ public interface FsVolumeSpi
* @return the block meta data file
*/
public File getMetaFile() {
if (metaSuffix == null) {
if (metaFile == null) {
return null;
}
String fileSuffix = metaSuffix;
if (blockSuffix != null) {
fileSuffix = blockSuffix + metaSuffix;
}
return new File(new File(volume.getBaseURI()).getAbsolutePath(),
fileSuffix);
return new File(basePath.getAbsolutePath(), fullMetaFile());
}
/**
@ -414,14 +394,24 @@ public interface FsVolumeSpi
}
public long getGenStamp() {
return metaSuffix != null ? Block.getGenerationStamp(
getMetaFile().getName()) :
HdfsConstants.GRANDFATHER_GENERATION_STAMP;
return metaFile != null ? Block.getGenerationStamp(fullMetaFile())
: HdfsConstants.GRANDFATHER_GENERATION_STAMP;
}
public FileRegion getFileRegion() {
return fileRegion;
}
private String fullMetaFile() {
if (metaFile == null) {
return null;
}
if (blockFile == null) {
return metaFile;
} else {
return blockFile + metaFile;
}
}
}
/**

View File

@ -1451,7 +1451,7 @@ public class FsVolumeImpl implements FsVolumeSpi {
long blockId = Block.getBlockId(file.getName());
verifyFileLocation(file, bpFinalizedDir,
blockId);
report.add(new ScanInfo(blockId, null, file, this));
report.add(new ScanInfo(blockId, dir, null, fileNames.get(i), this));
}
continue;
}
@ -1474,7 +1474,8 @@ public class FsVolumeImpl implements FsVolumeSpi {
}
}
verifyFileLocation(blockFile, bpFinalizedDir, blockId);
report.add(new ScanInfo(blockId, blockFile, metaFile, this));
report.add(new ScanInfo(blockId, dir, blockFile.getName(),
metaFile == null ? null : metaFile.getName(), this));
}
}

View File

@ -1040,19 +1040,21 @@ public class TestDirectoryScanner {
private final static String BPID_2 = "BP-367845636-127.0.0.1-5895645674231";
void testScanInfoObject(long blockId, File blockFile, File metaFile)
void testScanInfoObject(long blockId, File baseDir, String blockFile,
String metaFile)
throws Exception {
FsVolumeSpi.ScanInfo scanInfo =
new FsVolumeSpi.ScanInfo(blockId, blockFile, metaFile, TEST_VOLUME);
new FsVolumeSpi.ScanInfo(blockId, baseDir, blockFile, metaFile,
TEST_VOLUME);
assertEquals(blockId, scanInfo.getBlockId());
if (blockFile != null) {
assertEquals(blockFile.getAbsolutePath(),
assertEquals(new File(baseDir, blockFile).getAbsolutePath(),
scanInfo.getBlockFile().getAbsolutePath());
} else {
assertNull(scanInfo.getBlockFile());
}
if (metaFile != null) {
assertEquals(metaFile.getAbsolutePath(),
assertEquals(new File(baseDir, metaFile).getAbsolutePath(),
scanInfo.getMetaFile().getAbsolutePath());
} else {
assertNull(scanInfo.getMetaFile());
@ -1062,7 +1064,7 @@ public class TestDirectoryScanner {
void testScanInfoObject(long blockId) throws Exception {
FsVolumeSpi.ScanInfo scanInfo =
new FsVolumeSpi.ScanInfo(blockId, null, null, null);
new FsVolumeSpi.ScanInfo(blockId, null, null, null, null);
assertEquals(blockId, scanInfo.getBlockId());
assertNull(scanInfo.getBlockFile());
assertNull(scanInfo.getMetaFile());
@ -1071,24 +1073,19 @@ public class TestDirectoryScanner {
@Test(timeout = 120000)
public void TestScanInfo() throws Exception {
testScanInfoObject(123,
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
"blk_123"),
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
"blk_123__1001.meta"));
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()),
"blk_123", "blk_123__1001.meta");
testScanInfoObject(464,
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
"blk_123"),
null);
testScanInfoObject(523, null,
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
"blk_123__1009.meta"));
testScanInfoObject(789, null, null);
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()),
"blk_123", null);
testScanInfoObject(523,
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()),
null, "blk_123__1009.meta");
testScanInfoObject(789, null, null, null);
testScanInfoObject(456);
testScanInfoObject(123,
new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
"blk_567"),
new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
"blk_567__1004.meta"));
new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath()),
"blk_567", "blk_567__1004.meta");
}
/**

View File

@ -1786,8 +1786,8 @@ public class TestFsDatasetImpl {
assertFalse(metaFile.exists());
FsVolumeSpi.ScanInfo info = new FsVolumeSpi.ScanInfo(
replicaInfo.getBlockId(), blockFile.getAbsoluteFile(),
metaFile.getAbsoluteFile(), replicaInfo.getVolume());
replicaInfo.getBlockId(), blockFile.getParentFile().getAbsoluteFile(),
blockFile.getName(), metaFile.getName(), replicaInfo.getVolume());
fsdataset.checkAndUpdate(bpid, info);
BlockManager blockManager = cluster.getNameNode().