HDFS-4461. DirectoryScanner: volume prefix takes up memory for every block that is scanned (Colin Patrick McCabe)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1494401 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ab647cfcdb
commit
57006e1c8c
|
@ -290,6 +290,9 @@ Release 2.1.0-beta - UNRELEASED
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
|
|
||||||
|
HDFS-4461. DirectoryScanner: volume path prefix takes up memory for every
|
||||||
|
block that is scanned (Colin Patrick McCabe)
|
||||||
|
|
||||||
HDFS-4222. NN is unresponsive and loses heartbeats from DNs when
|
HDFS-4222. NN is unresponsive and loses heartbeats from DNs when
|
||||||
configured to use LDAP and LDAP has issues. (Xiaobo Peng, suresh)
|
configured to use LDAP and LDAP has issues. (Xiaobo Peng, suresh)
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,8 @@ import java.util.concurrent.Future;
|
||||||
import java.util.concurrent.ScheduledExecutorService;
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
import java.util.concurrent.ScheduledThreadPoolExecutor;
|
import java.util.concurrent.ScheduledThreadPoolExecutor;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -154,30 +156,109 @@ public class DirectoryScanner implements Runnable {
|
||||||
* Tracks the files and other information related to a block on the disk
|
* Tracks the files and other information related to a block on the disk
|
||||||
* Missing file is indicated by setting the corresponding member
|
* Missing file is indicated by setting the corresponding member
|
||||||
* to null.
|
* to null.
|
||||||
|
*
|
||||||
|
* Because millions of these structures may be created, we try to save
|
||||||
|
* memory here. So instead of storing full paths, we store path suffixes.
|
||||||
|
* The block file, if it exists, will have a path like this:
|
||||||
|
* <volume_base_path>/<block_path>
|
||||||
|
* So we don't need to store the volume path, since we already know what the
|
||||||
|
* volume is.
|
||||||
|
*
|
||||||
|
* The metadata file, if it exists, will have a path like this:
|
||||||
|
* <volume_base_path>/<block_path>_<genstamp>.meta
|
||||||
|
* So if we have a block file, there isn't any need to store the block path
|
||||||
|
* again.
|
||||||
|
*
|
||||||
|
* The accessor functions take care of these manipulations.
|
||||||
*/
|
*/
|
||||||
static class ScanInfo implements Comparable<ScanInfo> {
|
static class ScanInfo implements Comparable<ScanInfo> {
|
||||||
private final long blockId;
|
private final long blockId;
|
||||||
private final File metaFile;
|
|
||||||
private final File blockFile;
|
/**
|
||||||
|
* The block file path, relative to the volume's base directory.
|
||||||
|
* If there was no block file found, this may be null. If 'vol'
|
||||||
|
* is null, then this is the full path of the block file.
|
||||||
|
*/
|
||||||
|
private final String blockSuffix;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The suffix of the meta file path relative to the block file.
|
||||||
|
* If blockSuffix is null, then this will be the entire path relative
|
||||||
|
* to the volume base directory, or an absolute path if vol is also
|
||||||
|
* null.
|
||||||
|
*/
|
||||||
|
private final String metaSuffix;
|
||||||
|
|
||||||
private final FsVolumeSpi volume;
|
private final FsVolumeSpi volume;
|
||||||
|
|
||||||
|
private final static Pattern CONDENSED_PATH_REGEX =
|
||||||
|
Pattern.compile("(?<!^)(\\\\|/){2,}");
|
||||||
|
|
||||||
|
private final static String QUOTED_FILE_SEPARATOR =
|
||||||
|
Matcher.quoteReplacement(File.separator);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the most condensed version of the path.
|
||||||
|
*
|
||||||
|
* For example, the condensed version of /foo//bar is /foo/bar
|
||||||
|
* Unlike {@link File#getCanonicalPath()}, this will never perform I/O
|
||||||
|
* on the filesystem.
|
||||||
|
*/
|
||||||
|
private static String getCondensedPath(String path) {
|
||||||
|
return CONDENSED_PATH_REGEX.matcher(path).
|
||||||
|
replaceAll(QUOTED_FILE_SEPARATOR);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a path suffix.
|
||||||
|
*
|
||||||
|
* @param f The file to get the suffix for.
|
||||||
|
* @param prefix The prefix we're stripping off.
|
||||||
|
*
|
||||||
|
* @return A suffix such that prefix + suffix = path to f
|
||||||
|
*/
|
||||||
|
private static String getSuffix(File f, String prefix) {
|
||||||
|
String fullPath = getCondensedPath(f.getAbsolutePath());
|
||||||
|
if (fullPath.startsWith(prefix)) {
|
||||||
|
return fullPath.substring(prefix.length());
|
||||||
|
}
|
||||||
|
throw new RuntimeException(prefix + " is not a prefix of " + fullPath);
|
||||||
|
}
|
||||||
|
|
||||||
ScanInfo(long blockId) {
|
ScanInfo(long blockId) {
|
||||||
this(blockId, null, null, null);
|
this(blockId, null, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
ScanInfo(long blockId, File blockFile, File metaFile, FsVolumeSpi vol) {
|
ScanInfo(long blockId, File blockFile, File metaFile, FsVolumeSpi vol) {
|
||||||
this.blockId = blockId;
|
this.blockId = blockId;
|
||||||
this.metaFile = metaFile;
|
String condensedVolPath = vol == null ? null :
|
||||||
this.blockFile = blockFile;
|
getCondensedPath(vol.getBasePath());
|
||||||
|
this.blockSuffix = blockFile == null ? null :
|
||||||
|
getSuffix(blockFile, condensedVolPath);
|
||||||
|
if (metaFile == null) {
|
||||||
|
this.metaSuffix = null;
|
||||||
|
} else if (blockFile == null) {
|
||||||
|
this.metaSuffix = getSuffix(metaFile, condensedVolPath);
|
||||||
|
} else {
|
||||||
|
this.metaSuffix = getSuffix(metaFile,
|
||||||
|
condensedVolPath + blockSuffix);
|
||||||
|
}
|
||||||
this.volume = vol;
|
this.volume = vol;
|
||||||
}
|
}
|
||||||
|
|
||||||
File getMetaFile() {
|
File getBlockFile() {
|
||||||
return metaFile;
|
return (blockSuffix == null) ? null :
|
||||||
|
new File(volume.getBasePath(), blockSuffix);
|
||||||
}
|
}
|
||||||
|
|
||||||
File getBlockFile() {
|
File getMetaFile() {
|
||||||
return blockFile;
|
if (metaSuffix == null) {
|
||||||
|
return null;
|
||||||
|
} else if (blockSuffix == null) {
|
||||||
|
return new File(volume.getBasePath(), metaSuffix);
|
||||||
|
} else {
|
||||||
|
return new File(volume.getBasePath(), blockSuffix + metaSuffix);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
long getBlockId() {
|
long getBlockId() {
|
||||||
|
@ -216,7 +297,8 @@ public class DirectoryScanner implements Runnable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public long getGenStamp() {
|
public long getGenStamp() {
|
||||||
return metaFile != null ? Block.getGenerationStamp(metaFile.getName()) :
|
return metaSuffix != null ? Block.getGenerationStamp(
|
||||||
|
getMetaFile().getName()) :
|
||||||
GenerationStamp.GRANDFATHER_GENERATION_STAMP;
|
GenerationStamp.GRANDFATHER_GENERATION_STAMP;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,6 +30,9 @@ public interface FsVolumeSpi {
|
||||||
/** @return the available storage space in bytes. */
|
/** @return the available storage space in bytes. */
|
||||||
public long getAvailable() throws IOException;
|
public long getAvailable() throws IOException;
|
||||||
|
|
||||||
|
/** @return the base path to the volume */
|
||||||
|
public String getBasePath();
|
||||||
|
|
||||||
/** @return the path to the volume */
|
/** @return the path to the volume */
|
||||||
public String getPath(String bpid) throws IOException;
|
public String getPath(String bpid) throws IOException;
|
||||||
|
|
||||||
|
|
|
@ -124,6 +124,11 @@ class FsVolumeImpl implements FsVolumeSpi {
|
||||||
return bp;
|
return bp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getBasePath() {
|
||||||
|
return currentDir.getParent();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getPath(String bpid) throws IOException {
|
public String getPath(String bpid) throws IOException {
|
||||||
return getBlockPoolSlice(bpid).getDirectory().getAbsolutePath();
|
return getBlockPoolSlice(bpid).getDirectory().getAbsolutePath();
|
||||||
|
|
|
@ -380,4 +380,93 @@ public class TestDirectoryScanner {
|
||||||
assertNotNull(memBlock);
|
assertNotNull(memBlock);
|
||||||
assertEquals(genStamp, memBlock.getGenerationStamp());
|
assertEquals(genStamp, memBlock.getGenerationStamp());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class TestFsVolumeSpi implements FsVolumeSpi {
|
||||||
|
@Override
|
||||||
|
public String[] getBlockPoolList() {
|
||||||
|
return new String[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getAvailable() throws IOException {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getBasePath() {
|
||||||
|
return "/base";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getPath(String bpid) throws IOException {
|
||||||
|
return "/base/current/" + bpid;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public File getFinalizedDir(String bpid) throws IOException {
|
||||||
|
return new File("/base/current/" + bpid + "/finalized");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final static TestFsVolumeSpi TEST_VOLUME = new TestFsVolumeSpi();
|
||||||
|
|
||||||
|
private final static String BPID_1 = "BP-783049782-127.0.0.1-1370971773491";
|
||||||
|
|
||||||
|
private final static String BPID_2 = "BP-367845636-127.0.0.1-5895645674231";
|
||||||
|
|
||||||
|
void testScanInfoObject(long blockId, File blockFile, File metaFile)
|
||||||
|
throws Exception {
|
||||||
|
assertEquals("/base/current/" + BPID_1 + "/finalized",
|
||||||
|
TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath());
|
||||||
|
DirectoryScanner.ScanInfo scanInfo =
|
||||||
|
new DirectoryScanner.ScanInfo(blockId, blockFile, metaFile, TEST_VOLUME);
|
||||||
|
assertEquals(blockId, scanInfo.getBlockId());
|
||||||
|
if (blockFile != null) {
|
||||||
|
assertEquals(blockFile.getAbsolutePath(),
|
||||||
|
scanInfo.getBlockFile().getAbsolutePath());
|
||||||
|
} else {
|
||||||
|
assertNull(scanInfo.getBlockFile());
|
||||||
|
}
|
||||||
|
if (metaFile != null) {
|
||||||
|
assertEquals(metaFile.getAbsolutePath(),
|
||||||
|
scanInfo.getMetaFile().getAbsolutePath());
|
||||||
|
} else {
|
||||||
|
assertNull(scanInfo.getMetaFile());
|
||||||
|
}
|
||||||
|
assertEquals(TEST_VOLUME, scanInfo.getVolume());
|
||||||
|
}
|
||||||
|
|
||||||
|
void testScanInfoObject(long blockId) throws Exception {
|
||||||
|
DirectoryScanner.ScanInfo scanInfo =
|
||||||
|
new DirectoryScanner.ScanInfo(blockId);
|
||||||
|
assertEquals(blockId, scanInfo.getBlockId());
|
||||||
|
assertNull(scanInfo.getBlockFile());
|
||||||
|
assertNull(scanInfo.getMetaFile());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout=120000)
|
||||||
|
public void TestScanInfo() throws Exception {
|
||||||
|
testScanInfoObject(123,
|
||||||
|
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
|
||||||
|
"blk_123"),
|
||||||
|
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
|
||||||
|
"blk_123__1001.meta"));
|
||||||
|
testScanInfoObject(464,
|
||||||
|
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
|
||||||
|
"blk_123"),
|
||||||
|
null);
|
||||||
|
testScanInfoObject(523,
|
||||||
|
null,
|
||||||
|
new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
|
||||||
|
"blk_123__1009.meta"));
|
||||||
|
testScanInfoObject(789,
|
||||||
|
null,
|
||||||
|
null);
|
||||||
|
testScanInfoObject(456);
|
||||||
|
testScanInfoObject(123,
|
||||||
|
new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
|
||||||
|
"blk_567"),
|
||||||
|
new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
|
||||||
|
"blk_567__1004.meta"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue