HDFS-14401. Refine the implementation for HDFS cache on SCM. Contributed by Feilong He.
This commit is contained in:
parent
b00f9d89e4
commit
3b2a7aabf7
|
@ -25,8 +25,6 @@ import org.apache.hadoop.hdfs.net.DFSNetworkTopology;
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRackFaultTolerant;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRackFaultTolerant;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.MappableBlockLoader;
|
|
||||||
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.MemoryMappableBlockLoader;
|
|
||||||
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.RamDiskReplicaLruTracker;
|
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.RamDiskReplicaLruTracker;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.ReservedSpaceCalculator;
|
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.ReservedSpaceCalculator;
|
||||||
import org.apache.hadoop.hdfs.web.URLConnectionFactory;
|
import org.apache.hadoop.hdfs.web.URLConnectionFactory;
|
||||||
|
@ -382,22 +380,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
||||||
public static final String DFS_DATANODE_CACHE_REVOCATION_POLLING_MS = "dfs.datanode.cache.revocation.polling.ms";
|
public static final String DFS_DATANODE_CACHE_REVOCATION_POLLING_MS = "dfs.datanode.cache.revocation.polling.ms";
|
||||||
public static final long DFS_DATANODE_CACHE_REVOCATION_POLLING_MS_DEFAULT = 500L;
|
public static final long DFS_DATANODE_CACHE_REVOCATION_POLLING_MS_DEFAULT = 500L;
|
||||||
|
|
||||||
// Currently, the available cache loaders are MemoryMappableBlockLoader,
|
|
||||||
// PmemMappableBlockLoader. MemoryMappableBlockLoader is the default cache
|
|
||||||
// loader to cache block replica to memory.
|
|
||||||
public static final String DFS_DATANODE_CACHE_LOADER_CLASS =
|
|
||||||
"dfs.datanode.cache.loader.class";
|
|
||||||
public static final Class<? extends MappableBlockLoader>
|
|
||||||
DFS_DATANODE_CACHE_LOADER_CLASS_DEFAULT =
|
|
||||||
MemoryMappableBlockLoader.class;
|
|
||||||
// Multiple dirs separated by "," are acceptable.
|
// Multiple dirs separated by "," are acceptable.
|
||||||
public static final String DFS_DATANODE_CACHE_PMEM_DIRS_KEY =
|
public static final String DFS_DATANODE_CACHE_PMEM_DIRS_KEY =
|
||||||
"dfs.datanode.cache.pmem.dirs";
|
"dfs.datanode.cache.pmem.dirs";
|
||||||
public static final String DFS_DATANODE_CACHE_PMEM_DIRS_DEFAULT = "";
|
public static final String DFS_DATANODE_CACHE_PMEM_DIRS_DEFAULT = "";
|
||||||
// The cache capacity of persistent memory
|
|
||||||
public static final String DFS_DATANODE_CACHE_PMEM_CAPACITY_KEY =
|
|
||||||
"dfs.datanode.cache.pmem.capacity";
|
|
||||||
public static final long DFS_DATANODE_CACHE_PMEM_CAPACITY_DEFAULT = 0L;
|
|
||||||
|
|
||||||
public static final String DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY = "dfs.namenode.datanode.registration.ip-hostname-check";
|
public static final String DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY = "dfs.namenode.datanode.registration.ip-hostname-check";
|
||||||
public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT = true;
|
public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT = true;
|
||||||
|
|
|
@ -27,10 +27,6 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_SPLIT_THRESHO
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_SPLIT_THRESHOLD_DEFAULT;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_SPLIT_THRESHOLD_DEFAULT;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_KEY;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_CACHE_LOADER_CLASS;
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_CACHE_LOADER_CLASS_DEFAULT;
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_CACHE_PMEM_CAPACITY_DEFAULT;
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_CACHE_PMEM_CAPACITY_KEY;
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_CACHE_PMEM_DIRS_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_CACHE_PMEM_DIRS_KEY;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_LIFELINE_INTERVAL_SECONDS_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_LIFELINE_INTERVAL_SECONDS_KEY;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_NON_LOCAL_LAZY_PERSIST;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_NON_LOCAL_LAZY_PERSIST;
|
||||||
|
@ -71,7 +67,6 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
import org.apache.hadoop.hdfs.protocol.datatransfer.TrustedChannelResolver;
|
import org.apache.hadoop.hdfs.protocol.datatransfer.TrustedChannelResolver;
|
||||||
import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.DataTransferSaslUtil;
|
import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.DataTransferSaslUtil;
|
||||||
import org.apache.hadoop.hdfs.server.common.Util;
|
import org.apache.hadoop.hdfs.server.common.Util;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.MappableBlockLoader;
|
|
||||||
import org.apache.hadoop.security.SaslPropertiesResolver;
|
import org.apache.hadoop.security.SaslPropertiesResolver;
|
||||||
|
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
@ -121,9 +116,7 @@ public class DNConf {
|
||||||
final long xceiverStopTimeout;
|
final long xceiverStopTimeout;
|
||||||
final long restartReplicaExpiry;
|
final long restartReplicaExpiry;
|
||||||
|
|
||||||
private final Class<? extends MappableBlockLoader> cacheLoaderClass;
|
|
||||||
final long maxLockedMemory;
|
final long maxLockedMemory;
|
||||||
private final long maxLockedPmem;
|
|
||||||
private final String[] pmemDirs;
|
private final String[] pmemDirs;
|
||||||
|
|
||||||
private final long bpReadyTimeout;
|
private final long bpReadyTimeout;
|
||||||
|
@ -266,17 +259,10 @@ public class DNConf {
|
||||||
DFS_DATANODE_XCEIVER_STOP_TIMEOUT_MILLIS_KEY,
|
DFS_DATANODE_XCEIVER_STOP_TIMEOUT_MILLIS_KEY,
|
||||||
DFS_DATANODE_XCEIVER_STOP_TIMEOUT_MILLIS_DEFAULT);
|
DFS_DATANODE_XCEIVER_STOP_TIMEOUT_MILLIS_DEFAULT);
|
||||||
|
|
||||||
this.cacheLoaderClass = getConf().getClass(DFS_DATANODE_CACHE_LOADER_CLASS,
|
|
||||||
DFS_DATANODE_CACHE_LOADER_CLASS_DEFAULT, MappableBlockLoader.class);
|
|
||||||
|
|
||||||
this.maxLockedMemory = getConf().getLong(
|
this.maxLockedMemory = getConf().getLong(
|
||||||
DFS_DATANODE_MAX_LOCKED_MEMORY_KEY,
|
DFS_DATANODE_MAX_LOCKED_MEMORY_KEY,
|
||||||
DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT);
|
DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT);
|
||||||
|
|
||||||
this.maxLockedPmem = getConf().getLong(
|
|
||||||
DFS_DATANODE_CACHE_PMEM_CAPACITY_KEY,
|
|
||||||
DFS_DATANODE_CACHE_PMEM_CAPACITY_DEFAULT);
|
|
||||||
|
|
||||||
this.pmemDirs = getConf().getTrimmedStrings(
|
this.pmemDirs = getConf().getTrimmedStrings(
|
||||||
DFS_DATANODE_CACHE_PMEM_DIRS_KEY);
|
DFS_DATANODE_CACHE_PMEM_DIRS_KEY);
|
||||||
|
|
||||||
|
@ -342,10 +328,6 @@ public class DNConf {
|
||||||
return maxLockedMemory;
|
return maxLockedMemory;
|
||||||
}
|
}
|
||||||
|
|
||||||
public long getMaxLockedPmem() {
|
|
||||||
return maxLockedPmem;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if connect to datanode via hostname
|
* Returns true if connect to datanode via hostname
|
||||||
*
|
*
|
||||||
|
@ -449,10 +431,6 @@ public class DNConf {
|
||||||
return maxDataLength;
|
return maxDataLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Class<? extends MappableBlockLoader> getCacheLoaderClass() {
|
|
||||||
return cacheLoaderClass;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String[] getPmemVolumes() {
|
public String[] getPmemVolumes() {
|
||||||
return pmemDirs;
|
return pmemDirs;
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,7 +53,6 @@ import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
|
||||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.DNConf;
|
import org.apache.hadoop.hdfs.server.datanode.DNConf;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.DatanodeUtil;
|
import org.apache.hadoop.hdfs.server.datanode.DatanodeUtil;
|
||||||
import org.apache.hadoop.util.ReflectionUtils;
|
|
||||||
import org.apache.hadoop.util.Time;
|
import org.apache.hadoop.util.Time;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -183,9 +182,8 @@ public class FsDatasetCache {
|
||||||
this.memCacheStats = new MemoryCacheStats(
|
this.memCacheStats = new MemoryCacheStats(
|
||||||
dataset.datanode.getDnConf().getMaxLockedMemory());
|
dataset.datanode.getDnConf().getMaxLockedMemory());
|
||||||
|
|
||||||
Class<? extends MappableBlockLoader> cacheLoaderClass =
|
this.cacheLoader = MappableBlockLoaderFactory.createCacheLoader(
|
||||||
dataset.datanode.getDnConf().getCacheLoaderClass();
|
this.getDnConf());
|
||||||
this.cacheLoader = ReflectionUtils.newInstance(cacheLoaderClass, null);
|
|
||||||
cacheLoader.initialize(this);
|
cacheLoader.initialize(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -213,7 +211,7 @@ public class FsDatasetCache {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
ExtendedBlockId key = new ExtendedBlockId(blockId, bpid);
|
ExtendedBlockId key = new ExtendedBlockId(blockId, bpid);
|
||||||
return cacheLoader.getCachedPath(key);
|
return PmemVolumeManager.getInstance().getCachePath(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -380,14 +378,13 @@ public class FsDatasetCache {
|
||||||
MappableBlock mappableBlock = null;
|
MappableBlock mappableBlock = null;
|
||||||
ExtendedBlock extBlk = new ExtendedBlock(key.getBlockPoolId(),
|
ExtendedBlock extBlk = new ExtendedBlock(key.getBlockPoolId(),
|
||||||
key.getBlockId(), length, genstamp);
|
key.getBlockId(), length, genstamp);
|
||||||
long newUsedBytes = cacheLoader.reserve(length);
|
long newUsedBytes = cacheLoader.reserve(key, length);
|
||||||
boolean reservedBytes = false;
|
boolean reservedBytes = false;
|
||||||
try {
|
try {
|
||||||
if (newUsedBytes < 0) {
|
if (newUsedBytes < 0) {
|
||||||
LOG.warn("Failed to cache " + key + ": could not reserve " + length +
|
LOG.warn("Failed to cache " + key + ": could not reserve " +
|
||||||
" more bytes in the cache: " +
|
"more bytes in the cache: " + cacheLoader.getCacheCapacity() +
|
||||||
cacheLoader.getCacheCapacityConfigKey() +
|
" exceeded when try to reserve " + length + "bytes.");
|
||||||
" of " + cacheLoader.getCacheCapacity() + " exceeded.");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
reservedBytes = true;
|
reservedBytes = true;
|
||||||
|
@ -442,10 +439,10 @@ public class FsDatasetCache {
|
||||||
IOUtils.closeQuietly(metaIn);
|
IOUtils.closeQuietly(metaIn);
|
||||||
if (!success) {
|
if (!success) {
|
||||||
if (reservedBytes) {
|
if (reservedBytes) {
|
||||||
cacheLoader.release(length);
|
cacheLoader.release(key, length);
|
||||||
}
|
}
|
||||||
LOG.debug("Caching of {} was aborted. We are now caching only {} "
|
LOG.debug("Caching of {} was aborted. We are now caching only {} "
|
||||||
+ "bytes in total.", key, memCacheStats.getCacheUsed());
|
+ "bytes in total.", key, cacheLoader.getCacheUsed());
|
||||||
IOUtils.closeQuietly(mappableBlock);
|
IOUtils.closeQuietly(mappableBlock);
|
||||||
numBlocksFailedToCache.incrementAndGet();
|
numBlocksFailedToCache.incrementAndGet();
|
||||||
|
|
||||||
|
@ -519,7 +516,8 @@ public class FsDatasetCache {
|
||||||
synchronized (FsDatasetCache.this) {
|
synchronized (FsDatasetCache.this) {
|
||||||
mappableBlockMap.remove(key);
|
mappableBlockMap.remove(key);
|
||||||
}
|
}
|
||||||
long newUsedBytes = cacheLoader.release(value.mappableBlock.getLength());
|
long newUsedBytes = cacheLoader.
|
||||||
|
release(key, value.mappableBlock.getLength());
|
||||||
numBlocksCached.addAndGet(-1);
|
numBlocksCached.addAndGet(-1);
|
||||||
dataset.datanode.getMetrics().incrBlocksUncached(1);
|
dataset.datanode.getMetrics().incrBlocksUncached(1);
|
||||||
if (revocationTimeMs != 0) {
|
if (revocationTimeMs != 0) {
|
||||||
|
@ -592,4 +590,11 @@ public class FsDatasetCache {
|
||||||
MappableBlockLoader getCacheLoader() {
|
MappableBlockLoader getCacheLoader() {
|
||||||
return cacheLoader;
|
return cacheLoader;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method can be executed during DataNode shutdown.
|
||||||
|
*/
|
||||||
|
void shutdown() {
|
||||||
|
cacheLoader.shutdown();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2340,6 +2340,8 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
||||||
"from LazyWriter.join");
|
"from LazyWriter.join");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cacheManager.shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override // FSDatasetMBean
|
@Override // FSDatasetMBean
|
||||||
|
|
|
@ -65,26 +65,25 @@ public abstract class MappableBlockLoader {
|
||||||
/**
|
/**
|
||||||
* Try to reserve some given bytes.
|
* Try to reserve some given bytes.
|
||||||
*
|
*
|
||||||
|
* @param key The ExtendedBlockId for a block.
|
||||||
|
*
|
||||||
* @param bytesCount The number of bytes to add.
|
* @param bytesCount The number of bytes to add.
|
||||||
*
|
*
|
||||||
* @return The new number of usedBytes if we succeeded;
|
* @return The new number of usedBytes if we succeeded;
|
||||||
* -1 if we failed.
|
* -1 if we failed.
|
||||||
*/
|
*/
|
||||||
abstract long reserve(long bytesCount);
|
abstract long reserve(ExtendedBlockId key, long bytesCount);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Release some bytes that we're using.
|
* Release some bytes that we're using.
|
||||||
*
|
*
|
||||||
|
* @param key The ExtendedBlockId for a block.
|
||||||
|
*
|
||||||
* @param bytesCount The number of bytes to release.
|
* @param bytesCount The number of bytes to release.
|
||||||
*
|
*
|
||||||
* @return The new number of usedBytes.
|
* @return The new number of usedBytes.
|
||||||
*/
|
*/
|
||||||
abstract long release(long bytesCount);
|
abstract long release(ExtendedBlockId key, long bytesCount);
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the config key of cache capacity.
|
|
||||||
*/
|
|
||||||
abstract String getCacheCapacityConfigKey();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the approximate amount of cache space used.
|
* Get the approximate amount of cache space used.
|
||||||
|
@ -102,9 +101,11 @@ public abstract class MappableBlockLoader {
|
||||||
abstract boolean isTransientCache();
|
abstract boolean isTransientCache();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get a cache file path if applicable. Otherwise return null.
|
* Clean up cache, can be used during DataNode shutdown.
|
||||||
*/
|
*/
|
||||||
abstract String getCachedPath(ExtendedBlockId key);
|
void shutdown() {
|
||||||
|
// Do nothing.
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads bytes into a buffer until EOF or the buffer's limit is reached.
|
* Reads bytes into a buffer until EOF or the buffer's limit is reached.
|
||||||
|
|
|
@ -0,0 +1,47 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.hdfs.server.datanode.DNConf;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates MappableBlockLoader.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public final class MappableBlockLoaderFactory {
|
||||||
|
|
||||||
|
private MappableBlockLoaderFactory() {
|
||||||
|
// Prevent instantiation
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a specific cache loader according to the configuration.
|
||||||
|
* If persistent memory volume is not configured, return a cache loader
|
||||||
|
* for DRAM cache. Otherwise, return a cache loader for pmem cache.
|
||||||
|
*/
|
||||||
|
public static MappableBlockLoader createCacheLoader(DNConf conf) {
|
||||||
|
if (conf.getPmemVolumes() == null || conf.getPmemVolumes().length == 0) {
|
||||||
|
return new MemoryMappableBlockLoader();
|
||||||
|
}
|
||||||
|
return new PmemMappableBlockLoader();
|
||||||
|
}
|
||||||
|
}
|
|
@ -22,11 +22,12 @@ import com.google.common.base.Preconditions;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
|
||||||
import org.apache.hadoop.hdfs.ExtendedBlockId;
|
import org.apache.hadoop.hdfs.ExtendedBlockId;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
|
import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
|
||||||
import org.apache.hadoop.io.nativeio.NativeIO;
|
import org.apache.hadoop.io.nativeio.NativeIO;
|
||||||
import org.apache.hadoop.util.DataChecksum;
|
import org.apache.hadoop.util.DataChecksum;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.BufferedInputStream;
|
import java.io.BufferedInputStream;
|
||||||
import java.io.DataInputStream;
|
import java.io.DataInputStream;
|
||||||
|
@ -42,11 +43,13 @@ import java.nio.channels.FileChannel;
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
@InterfaceStability.Unstable
|
@InterfaceStability.Unstable
|
||||||
public class MemoryMappableBlockLoader extends MappableBlockLoader {
|
public class MemoryMappableBlockLoader extends MappableBlockLoader {
|
||||||
|
private static final Logger LOG =
|
||||||
|
LoggerFactory.getLogger(MemoryMappableBlockLoader.class);
|
||||||
private MemoryCacheStats memCacheStats;
|
private MemoryCacheStats memCacheStats;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void initialize(FsDatasetCache cacheManager) throws IOException {
|
void initialize(FsDatasetCache cacheManager) throws IOException {
|
||||||
|
LOG.info("Initializing cache loader: MemoryMappableBlockLoader.");
|
||||||
this.memCacheStats = cacheManager.getMemCacheStats();
|
this.memCacheStats = cacheManager.getMemCacheStats();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -148,11 +151,6 @@ public class MemoryMappableBlockLoader extends MappableBlockLoader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getCacheCapacityConfigKey() {
|
|
||||||
return DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getCacheUsed() {
|
public long getCacheUsed() {
|
||||||
return memCacheStats.getCacheUsed();
|
return memCacheStats.getCacheUsed();
|
||||||
|
@ -164,12 +162,12 @@ public class MemoryMappableBlockLoader extends MappableBlockLoader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
long reserve(long bytesCount) {
|
long reserve(ExtendedBlockId key, long bytesCount) {
|
||||||
return memCacheStats.reserve(bytesCount);
|
return memCacheStats.reserve(bytesCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
long release(long bytesCount) {
|
long release(ExtendedBlockId key, long bytesCount) {
|
||||||
return memCacheStats.release(bytesCount);
|
return memCacheStats.release(bytesCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -177,9 +175,4 @@ public class MemoryMappableBlockLoader extends MappableBlockLoader {
|
||||||
public boolean isTransientCache() {
|
public boolean isTransientCache() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getCachedPath(ExtendedBlockId key) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,12 +18,10 @@
|
||||||
|
|
||||||
package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;
|
package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
|
||||||
import org.apache.hadoop.hdfs.ExtendedBlockId;
|
import org.apache.hadoop.hdfs.ExtendedBlockId;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
|
import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.DNConf;
|
import org.apache.hadoop.hdfs.server.datanode.DNConf;
|
||||||
|
@ -53,14 +51,10 @@ public class PmemMappableBlockLoader extends MappableBlockLoader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void initialize(FsDatasetCache cacheManager) throws IOException {
|
void initialize(FsDatasetCache cacheManager) throws IOException {
|
||||||
|
LOG.info("Initializing cache loader: PmemMappableBlockLoader.");
|
||||||
DNConf dnConf = cacheManager.getDnConf();
|
DNConf dnConf = cacheManager.getDnConf();
|
||||||
this.pmemVolumeManager = new PmemVolumeManager(dnConf.getMaxLockedPmem(),
|
PmemVolumeManager.init(dnConf.getPmemVolumes());
|
||||||
dnConf.getPmemVolumes());
|
pmemVolumeManager = PmemVolumeManager.getInstance();
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
PmemVolumeManager getPmemVolumeManager() {
|
|
||||||
return pmemVolumeManager;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -69,7 +63,7 @@ public class PmemMappableBlockLoader extends MappableBlockLoader {
|
||||||
* Map the block and verify its checksum.
|
* Map the block and verify its checksum.
|
||||||
*
|
*
|
||||||
* The block will be mapped to PmemDir/BlockPoolId-BlockId, in which PmemDir
|
* The block will be mapped to PmemDir/BlockPoolId-BlockId, in which PmemDir
|
||||||
* is a persistent memory volume selected by getOneLocation() method.
|
* is a persistent memory volume chosen by PmemVolumeManager.
|
||||||
*
|
*
|
||||||
* @param length The current length of the block.
|
* @param length The current length of the block.
|
||||||
* @param blockIn The block input stream. Should be positioned at the
|
* @param blockIn The block input stream. Should be positioned at the
|
||||||
|
@ -100,8 +94,7 @@ public class PmemMappableBlockLoader extends MappableBlockLoader {
|
||||||
throw new IOException("Block InputStream has no FileChannel.");
|
throw new IOException("Block InputStream has no FileChannel.");
|
||||||
}
|
}
|
||||||
|
|
||||||
Byte volumeIndex = pmemVolumeManager.getOneVolumeIndex();
|
filePath = pmemVolumeManager.getCachePath(key);
|
||||||
filePath = pmemVolumeManager.inferCacheFilePath(volumeIndex, key);
|
|
||||||
file = new RandomAccessFile(filePath, "rw");
|
file = new RandomAccessFile(filePath, "rw");
|
||||||
out = file.getChannel().
|
out = file.getChannel().
|
||||||
map(FileChannel.MapMode.READ_WRITE, 0, length);
|
map(FileChannel.MapMode.READ_WRITE, 0, length);
|
||||||
|
@ -111,9 +104,7 @@ public class PmemMappableBlockLoader extends MappableBlockLoader {
|
||||||
}
|
}
|
||||||
verifyChecksumAndMapBlock(out, length, metaIn, blockChannel,
|
verifyChecksumAndMapBlock(out, length, metaIn, blockChannel,
|
||||||
blockFileName);
|
blockFileName);
|
||||||
mappableBlock = new PmemMappedBlock(
|
mappableBlock = new PmemMappedBlock(length, key);
|
||||||
length, volumeIndex, key, pmemVolumeManager);
|
|
||||||
pmemVolumeManager.afterCache(key, volumeIndex);
|
|
||||||
LOG.info("Successfully cached one replica:{} into persistent memory"
|
LOG.info("Successfully cached one replica:{} into persistent memory"
|
||||||
+ ", [cached path={}, length={}]", key, filePath, length);
|
+ ", [cached path={}, length={}]", key, filePath, length);
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -123,6 +114,7 @@ public class PmemMappableBlockLoader extends MappableBlockLoader {
|
||||||
}
|
}
|
||||||
IOUtils.closeQuietly(file);
|
IOUtils.closeQuietly(file);
|
||||||
if (mappableBlock == null) {
|
if (mappableBlock == null) {
|
||||||
|
LOG.debug("Delete {} due to unsuccessful mapping.", filePath);
|
||||||
FsDatasetUtil.deleteMappedFile(filePath);
|
FsDatasetUtil.deleteMappedFile(filePath);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -193,11 +185,6 @@ public class PmemMappableBlockLoader extends MappableBlockLoader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getCacheCapacityConfigKey() {
|
|
||||||
return DFSConfigKeys.DFS_DATANODE_CACHE_PMEM_CAPACITY_KEY;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getCacheUsed() {
|
public long getCacheUsed() {
|
||||||
return pmemVolumeManager.getCacheUsed();
|
return pmemVolumeManager.getCacheUsed();
|
||||||
|
@ -209,13 +196,13 @@ public class PmemMappableBlockLoader extends MappableBlockLoader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
long reserve(long bytesCount) {
|
long reserve(ExtendedBlockId key, long bytesCount) {
|
||||||
return pmemVolumeManager.reserve(bytesCount);
|
return pmemVolumeManager.reserve(key, bytesCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
long release(long bytesCount) {
|
long release(ExtendedBlockId key, long bytesCount) {
|
||||||
return pmemVolumeManager.release(bytesCount);
|
return pmemVolumeManager.release(key, bytesCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -224,7 +211,8 @@ public class PmemMappableBlockLoader extends MappableBlockLoader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getCachedPath(ExtendedBlockId key) {
|
void shutdown() {
|
||||||
return pmemVolumeManager.getCacheFilePath(key);
|
LOG.info("Clean up cache on persistent memory during shutdown.");
|
||||||
|
PmemVolumeManager.getInstance().cleanup();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,18 +35,13 @@ import java.io.IOException;
|
||||||
public class PmemMappedBlock implements MappableBlock {
|
public class PmemMappedBlock implements MappableBlock {
|
||||||
private static final Logger LOG =
|
private static final Logger LOG =
|
||||||
LoggerFactory.getLogger(PmemMappedBlock.class);
|
LoggerFactory.getLogger(PmemMappedBlock.class);
|
||||||
private final PmemVolumeManager pmemVolumeManager;
|
|
||||||
private long length;
|
private long length;
|
||||||
private Byte volumeIndex = null;
|
|
||||||
private ExtendedBlockId key;
|
private ExtendedBlockId key;
|
||||||
|
|
||||||
PmemMappedBlock(long length, Byte volumeIndex, ExtendedBlockId key,
|
PmemMappedBlock(long length, ExtendedBlockId key) {
|
||||||
PmemVolumeManager pmemVolumeManager) {
|
|
||||||
assert length > 0;
|
assert length > 0;
|
||||||
this.length = length;
|
this.length = length;
|
||||||
this.volumeIndex = volumeIndex;
|
|
||||||
this.key = key;
|
this.key = key;
|
||||||
this.pmemVolumeManager = pmemVolumeManager;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -57,10 +52,9 @@ public class PmemMappedBlock implements MappableBlock {
|
||||||
@Override
|
@Override
|
||||||
public void close() {
|
public void close() {
|
||||||
String cacheFilePath =
|
String cacheFilePath =
|
||||||
pmemVolumeManager.inferCacheFilePath(volumeIndex, key);
|
PmemVolumeManager.getInstance().getCachePath(key);
|
||||||
try {
|
try {
|
||||||
FsDatasetUtil.deleteMappedFile(cacheFilePath);
|
FsDatasetUtil.deleteMappedFile(cacheFilePath);
|
||||||
pmemVolumeManager.afterUncache(key);
|
|
||||||
LOG.info("Successfully uncached one replica:{} from persistent memory"
|
LOG.info("Successfully uncached one replica:{} from persistent memory"
|
||||||
+ ", [cached path={}, length={}]", key, cacheFilePath, length);
|
+ ", [cached path={}, length={}]", key, cacheFilePath, length);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
|
|
@ -35,6 +35,7 @@ import java.io.RandomAccessFile;
|
||||||
import java.nio.MappedByteBuffer;
|
import java.nio.MappedByteBuffer;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
@ -45,14 +46,19 @@ import java.util.concurrent.atomic.AtomicLong;
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
@InterfaceStability.Unstable
|
@InterfaceStability.Unstable
|
||||||
public class PmemVolumeManager {
|
public final class PmemVolumeManager {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Counts used bytes for persistent memory.
|
* Counts used bytes for persistent memory.
|
||||||
*/
|
*/
|
||||||
private class UsedBytesCount {
|
private static class UsedBytesCount {
|
||||||
|
private final long maxBytes;
|
||||||
private final AtomicLong usedBytes = new AtomicLong(0);
|
private final AtomicLong usedBytes = new AtomicLong(0);
|
||||||
|
|
||||||
|
UsedBytesCount(long maxBytes) {
|
||||||
|
this.maxBytes = maxBytes;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Try to reserve more bytes.
|
* Try to reserve more bytes.
|
||||||
*
|
*
|
||||||
|
@ -65,7 +71,7 @@ public class PmemVolumeManager {
|
||||||
while (true) {
|
while (true) {
|
||||||
long cur = usedBytes.get();
|
long cur = usedBytes.get();
|
||||||
long next = cur + bytesCount;
|
long next = cur + bytesCount;
|
||||||
if (next > cacheCapacity) {
|
if (next > maxBytes) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (usedBytes.compareAndSet(cur, next)) {
|
if (usedBytes.compareAndSet(cur, next)) {
|
||||||
|
@ -85,42 +91,76 @@ public class PmemVolumeManager {
|
||||||
return usedBytes.addAndGet(-bytesCount);
|
return usedBytes.addAndGet(-bytesCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
long get() {
|
long getUsedBytes() {
|
||||||
return usedBytes.get();
|
return usedBytes.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long getMaxBytes() {
|
||||||
|
return maxBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
long getAvailableBytes() {
|
||||||
|
return maxBytes - usedBytes.get();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Logger LOG =
|
private static final Logger LOG =
|
||||||
LoggerFactory.getLogger(PmemVolumeManager.class);
|
LoggerFactory.getLogger(PmemVolumeManager.class);
|
||||||
|
public static final String CACHE_DIR = "hdfs_pmem_cache";
|
||||||
|
private static PmemVolumeManager pmemVolumeManager = null;
|
||||||
private final ArrayList<String> pmemVolumes = new ArrayList<>();
|
private final ArrayList<String> pmemVolumes = new ArrayList<>();
|
||||||
// Maintain which pmem volume a block is cached to.
|
// Maintain which pmem volume a block is cached to.
|
||||||
private final Map<ExtendedBlockId, Byte> blockKeyToVolume =
|
private final Map<ExtendedBlockId, Byte> blockKeyToVolume =
|
||||||
new ConcurrentHashMap<>();
|
new ConcurrentHashMap<>();
|
||||||
private final UsedBytesCount usedBytesCount;
|
private final List<UsedBytesCount> usedBytesCounts = new ArrayList<>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The total cache capacity in bytes of persistent memory.
|
* The total cache capacity in bytes of persistent memory.
|
||||||
* It is 0L if the specific mappableBlockLoader couldn't cache data to pmem.
|
|
||||||
*/
|
*/
|
||||||
private final long cacheCapacity;
|
private long cacheCapacity;
|
||||||
|
private static long maxBytesPerPmem = -1;
|
||||||
private int count = 0;
|
private int count = 0;
|
||||||
// Strict atomic operation is not guaranteed for the performance sake.
|
private byte nextIndex = 0;
|
||||||
private int i = 0;
|
|
||||||
|
|
||||||
PmemVolumeManager(long maxBytes, String[] pmemVolumesConfigured)
|
private PmemVolumeManager(String[] pmemVolumesConfig) throws IOException {
|
||||||
throws IOException {
|
if (pmemVolumesConfig == null || pmemVolumesConfig.length == 0) {
|
||||||
if (pmemVolumesConfigured == null || pmemVolumesConfigured.length == 0) {
|
|
||||||
throw new IOException("The persistent memory volume, " +
|
throw new IOException("The persistent memory volume, " +
|
||||||
DFSConfigKeys.DFS_DATANODE_CACHE_PMEM_DIRS_KEY +
|
DFSConfigKeys.DFS_DATANODE_CACHE_PMEM_DIRS_KEY +
|
||||||
" is not configured!");
|
" is not configured!");
|
||||||
}
|
}
|
||||||
this.loadVolumes(pmemVolumesConfigured);
|
this.loadVolumes(pmemVolumesConfig);
|
||||||
this.usedBytesCount = new UsedBytesCount();
|
cacheCapacity = 0L;
|
||||||
this.cacheCapacity = maxBytes;
|
for (UsedBytesCount counter : usedBytesCounts) {
|
||||||
|
cacheCapacity += counter.getMaxBytes();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized static void init(String[] pmemVolumesConfig)
|
||||||
|
throws IOException {
|
||||||
|
if (pmemVolumeManager == null) {
|
||||||
|
pmemVolumeManager = new PmemVolumeManager(pmemVolumesConfig);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static PmemVolumeManager getInstance() {
|
||||||
|
if (pmemVolumeManager == null) {
|
||||||
|
throw new RuntimeException(
|
||||||
|
"The pmemVolumeManager should be instantiated!");
|
||||||
|
}
|
||||||
|
return pmemVolumeManager;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public static void setMaxBytes(long maxBytes) {
|
||||||
|
maxBytesPerPmem = maxBytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
public long getCacheUsed() {
|
public long getCacheUsed() {
|
||||||
return usedBytesCount.get();
|
long usedBytes = 0L;
|
||||||
|
for (UsedBytesCount counter : usedBytesCounts) {
|
||||||
|
usedBytes += counter.getUsedBytes();
|
||||||
|
}
|
||||||
|
return usedBytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
public long getCacheCapacity() {
|
public long getCacheCapacity() {
|
||||||
|
@ -130,24 +170,40 @@ public class PmemVolumeManager {
|
||||||
/**
|
/**
|
||||||
* Try to reserve more bytes on persistent memory.
|
* Try to reserve more bytes on persistent memory.
|
||||||
*
|
*
|
||||||
|
* @param key The ExtendedBlockId for a block.
|
||||||
|
*
|
||||||
* @param bytesCount The number of bytes to add.
|
* @param bytesCount The number of bytes to add.
|
||||||
*
|
*
|
||||||
* @return The new number of usedBytes if we succeeded;
|
* @return The new number of usedBytes if we succeeded;
|
||||||
* -1 if we failed.
|
* -1 if we failed.
|
||||||
*/
|
*/
|
||||||
long reserve(long bytesCount) {
|
synchronized long reserve(ExtendedBlockId key, long bytesCount) {
|
||||||
return usedBytesCount.reserve(bytesCount);
|
try {
|
||||||
|
byte index = chooseVolume(bytesCount);
|
||||||
|
long usedBytes = usedBytesCounts.get(index).reserve(bytesCount);
|
||||||
|
// Put the entry into blockKeyToVolume if reserving bytes succeeded.
|
||||||
|
if (usedBytes > 0) {
|
||||||
|
blockKeyToVolume.put(key, index);
|
||||||
|
}
|
||||||
|
return usedBytes;
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.warn(e.getMessage());
|
||||||
|
return -1L;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Release some bytes that we're using on persistent memory.
|
* Release some bytes that we're using on persistent memory.
|
||||||
*
|
*
|
||||||
|
* @param key The ExtendedBlockId for a block.
|
||||||
|
*
|
||||||
* @param bytesCount The number of bytes to release.
|
* @param bytesCount The number of bytes to release.
|
||||||
*
|
*
|
||||||
* @return The new number of usedBytes.
|
* @return The new number of usedBytes.
|
||||||
*/
|
*/
|
||||||
long release(long bytesCount) {
|
long release(ExtendedBlockId key, long bytesCount) {
|
||||||
return usedBytesCount.release(bytesCount);
|
Byte index = blockKeyToVolume.remove(key);
|
||||||
|
return usedBytesCounts.get(index).release(bytesCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -155,46 +211,70 @@ public class PmemVolumeManager {
|
||||||
*
|
*
|
||||||
* @throws IOException If there is no available pmem volume.
|
* @throws IOException If there is no available pmem volume.
|
||||||
*/
|
*/
|
||||||
private void loadVolumes(String[] volumes) throws IOException {
|
private void loadVolumes(String[] volumes)
|
||||||
|
throws IOException {
|
||||||
// Check whether the volume exists
|
// Check whether the volume exists
|
||||||
for (String volume: volumes) {
|
for (byte n = 0; n < volumes.length; n++) {
|
||||||
try {
|
try {
|
||||||
File pmemDir = new File(volume);
|
File pmemDir = new File(volumes[n]);
|
||||||
verifyIfValidPmemVolume(pmemDir);
|
File realPmemDir = verifyIfValidPmemVolume(pmemDir);
|
||||||
// Remove all files under the volume.
|
this.pmemVolumes.add(realPmemDir.getPath());
|
||||||
FileUtils.cleanDirectory(pmemDir);
|
long maxBytes;
|
||||||
|
if (maxBytesPerPmem == -1) {
|
||||||
|
maxBytes = realPmemDir.getUsableSpace();
|
||||||
|
} else {
|
||||||
|
maxBytes = maxBytesPerPmem;
|
||||||
|
}
|
||||||
|
UsedBytesCount usedBytesCount = new UsedBytesCount(maxBytes);
|
||||||
|
this.usedBytesCounts.add(usedBytesCount);
|
||||||
|
LOG.info("Added persistent memory - {} with size={}",
|
||||||
|
volumes[n], maxBytes);
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
LOG.error("Failed to parse persistent memory volume " + volume, e);
|
LOG.error("Failed to parse persistent memory volume " + volumes[n], e);
|
||||||
continue;
|
continue;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.error("Bad persistent memory volume: " + volume, e);
|
LOG.error("Bad persistent memory volume: " + volumes[n], e);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
pmemVolumes.add(volume);
|
|
||||||
LOG.info("Added persistent memory - " + volume);
|
|
||||||
}
|
}
|
||||||
count = pmemVolumes.size();
|
count = pmemVolumes.size();
|
||||||
if (count == 0) {
|
if (count == 0) {
|
||||||
throw new IOException(
|
throw new IOException(
|
||||||
"At least one valid persistent memory volume is required!");
|
"At least one valid persistent memory volume is required!");
|
||||||
}
|
}
|
||||||
|
cleanup();
|
||||||
|
}
|
||||||
|
|
||||||
|
void cleanup() {
|
||||||
|
// Remove all files under the volume.
|
||||||
|
for (String pmemDir: pmemVolumes) {
|
||||||
|
try {
|
||||||
|
FileUtils.cleanDirectory(new File(pmemDir));
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.error("Failed to clean up " + pmemDir, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
static void verifyIfValidPmemVolume(File pmemDir)
|
static File verifyIfValidPmemVolume(File pmemDir)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (!pmemDir.exists()) {
|
if (!pmemDir.exists()) {
|
||||||
final String message = pmemDir + " does not exist";
|
final String message = pmemDir + " does not exist";
|
||||||
throw new IOException(message);
|
throw new IOException(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!pmemDir.isDirectory()) {
|
if (!pmemDir.isDirectory()) {
|
||||||
final String message = pmemDir + " is not a directory";
|
final String message = pmemDir + " is not a directory";
|
||||||
throw new IllegalArgumentException(message);
|
throw new IllegalArgumentException(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
File realPmemDir = new File(getRealPmemDir(pmemDir.getPath()));
|
||||||
|
if (!realPmemDir.exists() && !realPmemDir.mkdir()) {
|
||||||
|
throw new IOException("Failed to create " + realPmemDir.getPath());
|
||||||
|
}
|
||||||
|
|
||||||
String uuidStr = UUID.randomUUID().toString();
|
String uuidStr = UUID.randomUUID().toString();
|
||||||
String testFilePath = pmemDir.getPath() + "/.verify.pmem." + uuidStr;
|
String testFilePath = realPmemDir.getPath() + "/.verify.pmem." + uuidStr;
|
||||||
byte[] contents = uuidStr.getBytes("UTF-8");
|
byte[] contents = uuidStr.getBytes("UTF-8");
|
||||||
RandomAccessFile testFile = null;
|
RandomAccessFile testFile = null;
|
||||||
MappedByteBuffer out = null;
|
MappedByteBuffer out = null;
|
||||||
|
@ -203,15 +283,17 @@ public class PmemVolumeManager {
|
||||||
out = testFile.getChannel().map(FileChannel.MapMode.READ_WRITE, 0,
|
out = testFile.getChannel().map(FileChannel.MapMode.READ_WRITE, 0,
|
||||||
contents.length);
|
contents.length);
|
||||||
if (out == null) {
|
if (out == null) {
|
||||||
throw new IOException("Failed to map the test file under " + pmemDir);
|
throw new IOException(
|
||||||
|
"Failed to map the test file under " + realPmemDir);
|
||||||
}
|
}
|
||||||
out.put(contents);
|
out.put(contents);
|
||||||
// Forces to write data to storage device containing the mapped file
|
// Forces to write data to storage device containing the mapped file
|
||||||
out.force();
|
out.force();
|
||||||
|
return realPmemDir;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new IOException(
|
throw new IOException(
|
||||||
"Exception while writing data to persistent storage dir: " +
|
"Exception while writing data to persistent storage dir: " +
|
||||||
pmemDir, e);
|
realPmemDir, e);
|
||||||
} finally {
|
} finally {
|
||||||
if (out != null) {
|
if (out != null) {
|
||||||
out.clear();
|
out.clear();
|
||||||
|
@ -229,18 +311,38 @@ public class PmemVolumeManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String getRealPmemDir(String rawPmemDir) {
|
||||||
|
return new File(rawPmemDir, CACHE_DIR).getAbsolutePath();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Choose a persistent memory volume based on a specific algorithm.
|
* Choose a persistent memory volume based on a specific algorithm.
|
||||||
* Currently it is a round-robin policy.
|
* Currently it is a round-robin policy.
|
||||||
*
|
*
|
||||||
* TODO: Refine volume selection policy by considering storage utilization.
|
* TODO: Refine volume selection policy by considering storage utilization.
|
||||||
*/
|
*/
|
||||||
Byte getOneVolumeIndex() throws IOException {
|
synchronized Byte chooseVolume(long bytesCount) throws IOException {
|
||||||
if (count != 0) {
|
if (count == 0) {
|
||||||
return (byte)(i++ % count);
|
|
||||||
} else {
|
|
||||||
throw new IOException("No usable persistent memory is found");
|
throw new IOException("No usable persistent memory is found");
|
||||||
}
|
}
|
||||||
|
int k = 0;
|
||||||
|
long maxAvailableSpace = 0L;
|
||||||
|
while (k++ != count) {
|
||||||
|
if (nextIndex == count) {
|
||||||
|
nextIndex = 0;
|
||||||
|
}
|
||||||
|
byte index = nextIndex++;
|
||||||
|
long availableBytes = usedBytesCounts.get(index).getAvailableBytes();
|
||||||
|
if (availableBytes >= bytesCount) {
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
if (availableBytes > maxAvailableSpace) {
|
||||||
|
maxAvailableSpace = availableBytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new IOException("There is no enough persistent memory space " +
|
||||||
|
"for caching. The current max available space is " +
|
||||||
|
maxAvailableSpace + ", but " + bytesCount + "is required.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
|
@ -276,7 +378,7 @@ public class PmemVolumeManager {
|
||||||
/**
|
/**
|
||||||
* The cache file path is pmemVolume/BlockPoolId-BlockId.
|
* The cache file path is pmemVolume/BlockPoolId-BlockId.
|
||||||
*/
|
*/
|
||||||
public String getCacheFilePath(ExtendedBlockId key) {
|
public String getCachePath(ExtendedBlockId key) {
|
||||||
Byte volumeIndex = blockKeyToVolume.get(key);
|
Byte volumeIndex = blockKeyToVolume.get(key);
|
||||||
if (volumeIndex == null) {
|
if (volumeIndex == null) {
|
||||||
return null;
|
return null;
|
||||||
|
@ -288,19 +390,4 @@ public class PmemVolumeManager {
|
||||||
Map<ExtendedBlockId, Byte> getBlockKeyToVolume() {
|
Map<ExtendedBlockId, Byte> getBlockKeyToVolume() {
|
||||||
return blockKeyToVolume;
|
return blockKeyToVolume;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Add cached block's ExtendedBlockId and its cache volume index to a map
|
|
||||||
* after cache.
|
|
||||||
*/
|
|
||||||
public void afterCache(ExtendedBlockId key, Byte volumeIndex) {
|
|
||||||
blockKeyToVolume.put(key, volumeIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Remove the record in blockKeyToVolume for uncached block after uncache.
|
|
||||||
*/
|
|
||||||
public void afterUncache(ExtendedBlockId key) {
|
|
||||||
blockKeyToVolume.remove(key);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2510,18 +2510,6 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>dfs.datanode.cache.loader.class</name>
|
|
||||||
<value>org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.MemoryMappableBlockLoader</value>
|
|
||||||
<description>
|
|
||||||
Currently, the available cache loaders are MemoryMappableBlockLoader, PmemMappableBlockLoader.
|
|
||||||
By default, MemoryMappableBlockLoader is used and it maps block replica into memory.
|
|
||||||
PmemMappableBlockLoader can map block to persistent memory with mapped byte buffer, which is
|
|
||||||
implemented by Java code. The value of dfs.datanode.cache.pmem.dirs specifies the persistent
|
|
||||||
memory directory.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.datanode.max.locked.memory</name>
|
<name>dfs.datanode.max.locked.memory</name>
|
||||||
<value>0</value>
|
<value>0</value>
|
||||||
|
@ -2538,18 +2526,6 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>dfs.datanode.cache.pmem.capacity</name>
|
|
||||||
<value>0</value>
|
|
||||||
<description>
|
|
||||||
The amount of persistent memory in bytes that can be used to cache block
|
|
||||||
replicas to persistent memory. Currently, persistent memory is only enabled
|
|
||||||
in HDFS Centralized Cache Management feature.
|
|
||||||
|
|
||||||
By default, this parameter is 0, which disables persistent memory caching.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.datanode.cache.pmem.dirs</name>
|
<name>dfs.datanode.cache.pmem.dirs</name>
|
||||||
<value></value>
|
<value></value>
|
||||||
|
|
|
@ -21,8 +21,6 @@ import org.apache.hadoop.hdfs.ExtendedBlockId;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||||
import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector;
|
import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector;
|
||||||
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_CACHE_LOADER_CLASS;
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_CACHE_PMEM_CAPACITY_KEY;
|
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_CACHE_PMEM_DIRS_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_CACHE_PMEM_DIRS_KEY;
|
||||||
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
|
@ -139,14 +137,11 @@ public class TestCacheByPmemMappableBlockLoader {
|
||||||
conf.setInt(DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY, 10);
|
conf.setInt(DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY, 10);
|
||||||
|
|
||||||
// Configuration for pmem cache
|
// Configuration for pmem cache
|
||||||
conf.set(DFS_DATANODE_CACHE_LOADER_CLASS,
|
|
||||||
"org.apache.hadoop.hdfs.server.datanode." +
|
|
||||||
"fsdataset.impl.PmemMappableBlockLoader");
|
|
||||||
new File(PMEM_DIR_0).getAbsoluteFile().mkdir();
|
new File(PMEM_DIR_0).getAbsoluteFile().mkdir();
|
||||||
new File(PMEM_DIR_1).getAbsoluteFile().mkdir();
|
new File(PMEM_DIR_1).getAbsoluteFile().mkdir();
|
||||||
// Configure two bogus pmem volumes
|
// Configure two bogus pmem volumes
|
||||||
conf.set(DFS_DATANODE_CACHE_PMEM_DIRS_KEY, PMEM_DIR_0 + "," + PMEM_DIR_1);
|
conf.set(DFS_DATANODE_CACHE_PMEM_DIRS_KEY, PMEM_DIR_0 + "," + PMEM_DIR_1);
|
||||||
conf.setLong(DFS_DATANODE_CACHE_PMEM_CAPACITY_KEY, CACHE_CAPACITY);
|
PmemVolumeManager.setMaxBytes((long) (CACHE_CAPACITY * 0.5));
|
||||||
|
|
||||||
prevCacheManipulator = NativeIO.POSIX.getCacheManipulator();
|
prevCacheManipulator = NativeIO.POSIX.getCacheManipulator();
|
||||||
NativeIO.POSIX.setCacheManipulator(new NoMlockCacheManipulator());
|
NativeIO.POSIX.setCacheManipulator(new NoMlockCacheManipulator());
|
||||||
|
@ -183,18 +178,17 @@ public class TestCacheByPmemMappableBlockLoader {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testPmemVolumeManager() throws IOException {
|
public void testPmemVolumeManager() throws IOException {
|
||||||
PmemVolumeManager pmemVolumeManager =
|
PmemVolumeManager pmemVolumeManager = PmemVolumeManager.getInstance();
|
||||||
cacheLoader.getPmemVolumeManager();
|
|
||||||
assertNotNull(pmemVolumeManager);
|
assertNotNull(pmemVolumeManager);
|
||||||
assertEquals(CACHE_CAPACITY, pmemVolumeManager.getCacheCapacity());
|
assertEquals(CACHE_CAPACITY, pmemVolumeManager.getCacheCapacity());
|
||||||
// Test round-robin selection policy
|
// Test round-robin selection policy
|
||||||
long count1 = 0, count2 = 0;
|
long count1 = 0, count2 = 0;
|
||||||
for (int i = 0; i < 10; i++) {
|
for (int i = 0; i < 10; i++) {
|
||||||
Byte index = pmemVolumeManager.getOneVolumeIndex();
|
Byte index = pmemVolumeManager.chooseVolume(BLOCK_SIZE);
|
||||||
String volume = pmemVolumeManager.getVolumeByIndex(index);
|
String volume = pmemVolumeManager.getVolumeByIndex(index);
|
||||||
if (volume.equals(PMEM_DIR_0)) {
|
if (volume.equals(PmemVolumeManager.getRealPmemDir(PMEM_DIR_0))) {
|
||||||
count1++;
|
count1++;
|
||||||
} else if (volume.equals(PMEM_DIR_1)) {
|
} else if (volume.equals(PmemVolumeManager.getRealPmemDir(PMEM_DIR_1))) {
|
||||||
count2++;
|
count2++;
|
||||||
} else {
|
} else {
|
||||||
fail("Unexpected persistent storage location:" + volume);
|
fail("Unexpected persistent storage location:" + volume);
|
||||||
|
@ -254,7 +248,7 @@ public class TestCacheByPmemMappableBlockLoader {
|
||||||
// The pmem cache space is expected to have been used up.
|
// The pmem cache space is expected to have been used up.
|
||||||
assertEquals(CACHE_CAPACITY, cacheManager.getPmemCacheUsed());
|
assertEquals(CACHE_CAPACITY, cacheManager.getPmemCacheUsed());
|
||||||
Map<ExtendedBlockId, Byte> blockKeyToVolume =
|
Map<ExtendedBlockId, Byte> blockKeyToVolume =
|
||||||
cacheLoader.getPmemVolumeManager().getBlockKeyToVolume();
|
PmemVolumeManager.getInstance().getBlockKeyToVolume();
|
||||||
// All block keys should be kept in blockKeyToVolume
|
// All block keys should be kept in blockKeyToVolume
|
||||||
assertEquals(blockKeyToVolume.size(), maxCacheBlocksNum);
|
assertEquals(blockKeyToVolume.size(), maxCacheBlocksNum);
|
||||||
assertTrue(blockKeyToVolume.keySet().containsAll(blockKeys));
|
assertTrue(blockKeyToVolume.keySet().containsAll(blockKeys));
|
||||||
|
@ -266,11 +260,13 @@ public class TestCacheByPmemMappableBlockLoader {
|
||||||
// to pmem.
|
// to pmem.
|
||||||
assertNotNull(cachePath);
|
assertNotNull(cachePath);
|
||||||
String expectFileName =
|
String expectFileName =
|
||||||
cacheLoader.getPmemVolumeManager().getCacheFileName(key);
|
PmemVolumeManager.getInstance().getCacheFileName(key);
|
||||||
if (cachePath.startsWith(PMEM_DIR_0)) {
|
if (cachePath.startsWith(PMEM_DIR_0)) {
|
||||||
assertTrue(cachePath.equals(PMEM_DIR_0 + "/" + expectFileName));
|
assertTrue(cachePath.equals(PmemVolumeManager
|
||||||
|
.getRealPmemDir(PMEM_DIR_0) + "/" + expectFileName));
|
||||||
} else if (cachePath.startsWith(PMEM_DIR_1)) {
|
} else if (cachePath.startsWith(PMEM_DIR_1)) {
|
||||||
assertTrue(cachePath.equals(PMEM_DIR_1 + "/" + expectFileName));
|
assertTrue(cachePath.equals(PmemVolumeManager
|
||||||
|
.getRealPmemDir(PMEM_DIR_1) + "/" + expectFileName));
|
||||||
} else {
|
} else {
|
||||||
fail("The cache path is not the expected one: " + cachePath);
|
fail("The cache path is not the expected one: " + cachePath);
|
||||||
}
|
}
|
||||||
|
|
|
@ -401,9 +401,10 @@ public class TestFsDatasetCache {
|
||||||
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
GenericTestUtils.waitFor(new Supplier<Boolean>() {
|
||||||
@Override
|
@Override
|
||||||
public Boolean get() {
|
public Boolean get() {
|
||||||
|
// check the log reported by FsDataSetCache
|
||||||
|
// in the case that cache capacity is exceeded.
|
||||||
int lines = appender.countLinesWithMessage(
|
int lines = appender.countLinesWithMessage(
|
||||||
"more bytes in the cache: " +
|
"could not reserve more bytes in the cache: ");
|
||||||
DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY);
|
|
||||||
return lines > 0;
|
return lines > 0;
|
||||||
}
|
}
|
||||||
}, 500, 30000);
|
}, 500, 30000);
|
||||||
|
|
Loading…
Reference in New Issue