HDFS-6898. DN must reserve space for a full block when an RBW block is created. (Contributed by Arpit Agarwal)
This commit is contained in:
parent
cbea1b10ef
commit
d1fa58292e
|
@ -612,6 +612,9 @@ Release 2.6.0 - UNRELEASED
|
||||||
HDFS-6862. Add missing timeout annotations to tests. (Xiaoyu Yao via
|
HDFS-6862. Add missing timeout annotations to tests. (Xiaoyu Yao via
|
||||||
Arpit Agarwal)
|
Arpit Agarwal)
|
||||||
|
|
||||||
|
HDFS-6898. DN must reserve space for a full block when an RBW block is
|
||||||
|
created. (Arpit Agarwal)
|
||||||
|
|
||||||
BREAKDOWN OF HDFS-6134 AND HADOOP-10150 SUBTASKS AND RELATED JIRAS
|
BREAKDOWN OF HDFS-6134 AND HADOOP-10150 SUBTASKS AND RELATED JIRAS
|
||||||
|
|
||||||
HDFS-6387. HDFS CLI admin tool for creating & deleting an
|
HDFS-6387. HDFS CLI admin tool for creating & deleting an
|
||||||
|
|
|
@ -48,7 +48,7 @@ public class HdfsConstants {
|
||||||
"org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol";
|
"org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol";
|
||||||
|
|
||||||
|
|
||||||
public static final int MIN_BLOCKS_FOR_WRITE = 5;
|
public static final int MIN_BLOCKS_FOR_WRITE = 1;
|
||||||
|
|
||||||
// Long that indicates "leave current quota unchanged"
|
// Long that indicates "leave current quota unchanged"
|
||||||
public static final long QUOTA_DONT_SET = Long.MAX_VALUE;
|
public static final long QUOTA_DONT_SET = Long.MAX_VALUE;
|
||||||
|
|
|
@ -34,10 +34,12 @@ public class ReplicaBeingWritten extends ReplicaInPipeline {
|
||||||
* @param genStamp replica generation stamp
|
* @param genStamp replica generation stamp
|
||||||
* @param vol volume where replica is located
|
* @param vol volume where replica is located
|
||||||
* @param dir directory path where block and meta files are located
|
* @param dir directory path where block and meta files are located
|
||||||
|
* @param bytesToReserve disk space to reserve for this replica, based on
|
||||||
|
* the estimated maximum block length.
|
||||||
*/
|
*/
|
||||||
public ReplicaBeingWritten(long blockId, long genStamp,
|
public ReplicaBeingWritten(long blockId, long genStamp,
|
||||||
FsVolumeSpi vol, File dir) {
|
FsVolumeSpi vol, File dir, long bytesToReserve) {
|
||||||
super( blockId, genStamp, vol, dir);
|
super(blockId, genStamp, vol, dir, bytesToReserve);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -60,10 +62,12 @@ public class ReplicaBeingWritten extends ReplicaInPipeline {
|
||||||
* @param vol volume where replica is located
|
* @param vol volume where replica is located
|
||||||
* @param dir directory path where block and meta files are located
|
* @param dir directory path where block and meta files are located
|
||||||
* @param writer a thread that is writing to this replica
|
* @param writer a thread that is writing to this replica
|
||||||
|
* @param bytesToReserve disk space to reserve for this replica, based on
|
||||||
|
* the estimated maximum block length.
|
||||||
*/
|
*/
|
||||||
public ReplicaBeingWritten(long blockId, long len, long genStamp,
|
public ReplicaBeingWritten(long blockId, long len, long genStamp,
|
||||||
FsVolumeSpi vol, File dir, Thread writer ) {
|
FsVolumeSpi vol, File dir, Thread writer, long bytesToReserve) {
|
||||||
super( blockId, len, genStamp, vol, dir, writer);
|
super(blockId, len, genStamp, vol, dir, writer, bytesToReserve);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -45,16 +45,25 @@ public class ReplicaInPipeline extends ReplicaInfo
|
||||||
private byte[] lastChecksum;
|
private byte[] lastChecksum;
|
||||||
private Thread writer;
|
private Thread writer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bytes reserved for this replica on the containing volume.
|
||||||
|
* Based off difference between the estimated maximum block length and
|
||||||
|
* the bytes already written to this block.
|
||||||
|
*/
|
||||||
|
private long bytesReserved;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor for a zero length replica
|
* Constructor for a zero length replica
|
||||||
* @param blockId block id
|
* @param blockId block id
|
||||||
* @param genStamp replica generation stamp
|
* @param genStamp replica generation stamp
|
||||||
* @param vol volume where replica is located
|
* @param vol volume where replica is located
|
||||||
* @param dir directory path where block and meta files are located
|
* @param dir directory path where block and meta files are located
|
||||||
|
* @param bytesToReserve disk space to reserve for this replica, based on
|
||||||
|
* the estimated maximum block length.
|
||||||
*/
|
*/
|
||||||
public ReplicaInPipeline(long blockId, long genStamp,
|
public ReplicaInPipeline(long blockId, long genStamp,
|
||||||
FsVolumeSpi vol, File dir) {
|
FsVolumeSpi vol, File dir, long bytesToReserve) {
|
||||||
this( blockId, 0L, genStamp, vol, dir, Thread.currentThread());
|
this(blockId, 0L, genStamp, vol, dir, Thread.currentThread(), bytesToReserve);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -67,7 +76,7 @@ public class ReplicaInPipeline extends ReplicaInfo
|
||||||
ReplicaInPipeline(Block block,
|
ReplicaInPipeline(Block block,
|
||||||
FsVolumeSpi vol, File dir, Thread writer) {
|
FsVolumeSpi vol, File dir, Thread writer) {
|
||||||
this( block.getBlockId(), block.getNumBytes(), block.getGenerationStamp(),
|
this( block.getBlockId(), block.getNumBytes(), block.getGenerationStamp(),
|
||||||
vol, dir, writer);
|
vol, dir, writer, 0L);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -78,13 +87,16 @@ public class ReplicaInPipeline extends ReplicaInfo
|
||||||
* @param vol volume where replica is located
|
* @param vol volume where replica is located
|
||||||
* @param dir directory path where block and meta files are located
|
* @param dir directory path where block and meta files are located
|
||||||
* @param writer a thread that is writing to this replica
|
* @param writer a thread that is writing to this replica
|
||||||
|
* @param bytesToReserve disk space to reserve for this replica, based on
|
||||||
|
* the estimated maximum block length.
|
||||||
*/
|
*/
|
||||||
ReplicaInPipeline(long blockId, long len, long genStamp,
|
ReplicaInPipeline(long blockId, long len, long genStamp,
|
||||||
FsVolumeSpi vol, File dir, Thread writer ) {
|
FsVolumeSpi vol, File dir, Thread writer, long bytesToReserve) {
|
||||||
super( blockId, len, genStamp, vol, dir);
|
super( blockId, len, genStamp, vol, dir);
|
||||||
this.bytesAcked = len;
|
this.bytesAcked = len;
|
||||||
this.bytesOnDisk = len;
|
this.bytesOnDisk = len;
|
||||||
this.writer = writer;
|
this.writer = writer;
|
||||||
|
this.bytesReserved = bytesToReserve;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -96,6 +108,7 @@ public class ReplicaInPipeline extends ReplicaInfo
|
||||||
this.bytesAcked = from.getBytesAcked();
|
this.bytesAcked = from.getBytesAcked();
|
||||||
this.bytesOnDisk = from.getBytesOnDisk();
|
this.bytesOnDisk = from.getBytesOnDisk();
|
||||||
this.writer = from.writer;
|
this.writer = from.writer;
|
||||||
|
this.bytesReserved = from.bytesReserved;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -115,7 +128,14 @@ public class ReplicaInPipeline extends ReplicaInfo
|
||||||
|
|
||||||
@Override // ReplicaInPipelineInterface
|
@Override // ReplicaInPipelineInterface
|
||||||
public void setBytesAcked(long bytesAcked) {
|
public void setBytesAcked(long bytesAcked) {
|
||||||
|
long newBytesAcked = bytesAcked - this.bytesAcked;
|
||||||
this.bytesAcked = bytesAcked;
|
this.bytesAcked = bytesAcked;
|
||||||
|
|
||||||
|
// Once bytes are ACK'ed we can release equivalent space from the
|
||||||
|
// volume's reservedForRbw count. We could have released it as soon
|
||||||
|
// as the write-to-disk completed but that would be inefficient.
|
||||||
|
getVolume().releaseReservedSpace(newBytesAcked);
|
||||||
|
bytesReserved -= newBytesAcked;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override // ReplicaInPipelineInterface
|
@Override // ReplicaInPipelineInterface
|
||||||
|
@ -123,6 +143,11 @@ public class ReplicaInPipeline extends ReplicaInfo
|
||||||
return bytesOnDisk;
|
return bytesOnDisk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getBytesReserved() {
|
||||||
|
return bytesReserved;
|
||||||
|
}
|
||||||
|
|
||||||
@Override // ReplicaInPipelineInterface
|
@Override // ReplicaInPipelineInterface
|
||||||
public synchronized void setLastChecksumAndDataLen(long dataLength, byte[] lastChecksum) {
|
public synchronized void setLastChecksumAndDataLen(long dataLength, byte[] lastChecksum) {
|
||||||
this.bytesOnDisk = dataLength;
|
this.bytesOnDisk = dataLength;
|
||||||
|
|
|
@ -223,6 +223,13 @@ abstract public class ReplicaInfo extends Block implements Replica {
|
||||||
// no need to be unlinked
|
// no need to be unlinked
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Number of bytes reserved for this replica on disk.
|
||||||
|
*/
|
||||||
|
public long getBytesReserved() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy specified file into a temporary file. Then rename the
|
* Copy specified file into a temporary file. Then rename the
|
||||||
* temporary file to the original name. This will cause any
|
* temporary file to the original name. This will cause any
|
||||||
|
|
|
@ -45,4 +45,15 @@ public interface FsVolumeSpi {
|
||||||
public File getFinalizedDir(String bpid) throws IOException;
|
public File getFinalizedDir(String bpid) throws IOException;
|
||||||
|
|
||||||
public StorageType getStorageType();
|
public StorageType getStorageType();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reserve disk space for an RBW block so a writer does not run out of
|
||||||
|
* space before the block is full.
|
||||||
|
*/
|
||||||
|
public void reserveSpaceForRbw(long bytesToReserve);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Release disk space previously reserved for RBW block.
|
||||||
|
*/
|
||||||
|
public void releaseReservedSpace(long bytesToRelease);
|
||||||
}
|
}
|
|
@ -240,7 +240,7 @@ class BlockPoolSlice {
|
||||||
return DatanodeUtil.createTmpFile(b, f);
|
return DatanodeUtil.createTmpFile(b, f);
|
||||||
}
|
}
|
||||||
|
|
||||||
File addBlock(Block b, File f) throws IOException {
|
File addFinalizedBlock(Block b, File f) throws IOException {
|
||||||
File blockDir = DatanodeUtil.idToBlockDir(finalizedDir, b.getBlockId());
|
File blockDir = DatanodeUtil.idToBlockDir(finalizedDir, b.getBlockId());
|
||||||
if (!blockDir.exists()) {
|
if (!blockDir.exists()) {
|
||||||
if (!blockDir.mkdirs()) {
|
if (!blockDir.mkdirs()) {
|
||||||
|
@ -334,9 +334,11 @@ class BlockPoolSlice {
|
||||||
// The restart meta file exists
|
// The restart meta file exists
|
||||||
if (sc.hasNextLong() && (sc.nextLong() > Time.now())) {
|
if (sc.hasNextLong() && (sc.nextLong() > Time.now())) {
|
||||||
// It didn't expire. Load the replica as a RBW.
|
// It didn't expire. Load the replica as a RBW.
|
||||||
|
// We don't know the expected block length, so just use 0
|
||||||
|
// and don't reserve any more space for writes.
|
||||||
newReplica = new ReplicaBeingWritten(blockId,
|
newReplica = new ReplicaBeingWritten(blockId,
|
||||||
validateIntegrityAndSetLength(file, genStamp),
|
validateIntegrityAndSetLength(file, genStamp),
|
||||||
genStamp, volume, file.getParentFile(), null);
|
genStamp, volume, file.getParentFile(), null, 0);
|
||||||
loadRwr = false;
|
loadRwr = false;
|
||||||
}
|
}
|
||||||
sc.close();
|
sc.close();
|
||||||
|
|
|
@ -593,7 +593,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
||||||
+ " from " + srcfile + " to " + dstfile.getAbsolutePath(), e);
|
+ " from " + srcfile + " to " + dstfile.getAbsolutePath(), e);
|
||||||
}
|
}
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("addBlock: Moved " + srcmeta + " to " + dstmeta
|
LOG.debug("addFinalizedBlock: Moved " + srcmeta + " to " + dstmeta
|
||||||
+ " and " + srcfile + " to " + dstfile);
|
+ " and " + srcfile + " to " + dstfile);
|
||||||
}
|
}
|
||||||
return dstfile;
|
return dstfile;
|
||||||
|
@ -712,7 +712,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
||||||
File oldmeta = replicaInfo.getMetaFile();
|
File oldmeta = replicaInfo.getMetaFile();
|
||||||
ReplicaBeingWritten newReplicaInfo = new ReplicaBeingWritten(
|
ReplicaBeingWritten newReplicaInfo = new ReplicaBeingWritten(
|
||||||
replicaInfo.getBlockId(), replicaInfo.getNumBytes(), newGS,
|
replicaInfo.getBlockId(), replicaInfo.getNumBytes(), newGS,
|
||||||
v, newBlkFile.getParentFile(), Thread.currentThread());
|
v, newBlkFile.getParentFile(), Thread.currentThread(), estimateBlockLen);
|
||||||
File newmeta = newReplicaInfo.getMetaFile();
|
File newmeta = newReplicaInfo.getMetaFile();
|
||||||
|
|
||||||
// rename meta file to rbw directory
|
// rename meta file to rbw directory
|
||||||
|
@ -748,7 +748,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
||||||
|
|
||||||
// Replace finalized replica by a RBW replica in replicas map
|
// Replace finalized replica by a RBW replica in replicas map
|
||||||
volumeMap.add(bpid, newReplicaInfo);
|
volumeMap.add(bpid, newReplicaInfo);
|
||||||
|
v.reserveSpaceForRbw(estimateBlockLen - replicaInfo.getNumBytes());
|
||||||
return newReplicaInfo;
|
return newReplicaInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -876,7 +876,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
||||||
// create a rbw file to hold block in the designated volume
|
// create a rbw file to hold block in the designated volume
|
||||||
File f = v.createRbwFile(b.getBlockPoolId(), b.getLocalBlock());
|
File f = v.createRbwFile(b.getBlockPoolId(), b.getLocalBlock());
|
||||||
ReplicaBeingWritten newReplicaInfo = new ReplicaBeingWritten(b.getBlockId(),
|
ReplicaBeingWritten newReplicaInfo = new ReplicaBeingWritten(b.getBlockId(),
|
||||||
b.getGenerationStamp(), v, f.getParentFile());
|
b.getGenerationStamp(), v, f.getParentFile(), b.getNumBytes());
|
||||||
volumeMap.add(b.getBlockPoolId(), newReplicaInfo);
|
volumeMap.add(b.getBlockPoolId(), newReplicaInfo);
|
||||||
return newReplicaInfo;
|
return newReplicaInfo;
|
||||||
}
|
}
|
||||||
|
@ -992,7 +992,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
||||||
// create RBW
|
// create RBW
|
||||||
final ReplicaBeingWritten rbw = new ReplicaBeingWritten(
|
final ReplicaBeingWritten rbw = new ReplicaBeingWritten(
|
||||||
blockId, numBytes, expectedGs,
|
blockId, numBytes, expectedGs,
|
||||||
v, dest.getParentFile(), Thread.currentThread());
|
v, dest.getParentFile(), Thread.currentThread(), 0);
|
||||||
rbw.setBytesAcked(visible);
|
rbw.setBytesAcked(visible);
|
||||||
// overwrite the RBW in the volume map
|
// overwrite the RBW in the volume map
|
||||||
volumeMap.add(b.getBlockPoolId(), rbw);
|
volumeMap.add(b.getBlockPoolId(), rbw);
|
||||||
|
@ -1013,7 +1013,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
||||||
// create a temporary file to hold block in the designated volume
|
// create a temporary file to hold block in the designated volume
|
||||||
File f = v.createTmpFile(b.getBlockPoolId(), b.getLocalBlock());
|
File f = v.createTmpFile(b.getBlockPoolId(), b.getLocalBlock());
|
||||||
ReplicaInPipeline newReplicaInfo = new ReplicaInPipeline(b.getBlockId(),
|
ReplicaInPipeline newReplicaInfo = new ReplicaInPipeline(b.getBlockId(),
|
||||||
b.getGenerationStamp(), v, f.getParentFile());
|
b.getGenerationStamp(), v, f.getParentFile(), 0);
|
||||||
volumeMap.add(b.getBlockPoolId(), newReplicaInfo);
|
volumeMap.add(b.getBlockPoolId(), newReplicaInfo);
|
||||||
|
|
||||||
return newReplicaInfo;
|
return newReplicaInfo;
|
||||||
|
@ -1079,7 +1079,8 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
||||||
" for block " + replicaInfo);
|
" for block " + replicaInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
File dest = v.addBlock(bpid, replicaInfo, f);
|
File dest = v.addFinalizedBlock(
|
||||||
|
bpid, replicaInfo, f, replicaInfo.getBytesReserved());
|
||||||
newReplicaInfo = new FinalizedReplica(replicaInfo, v, dest.getParentFile());
|
newReplicaInfo = new FinalizedReplica(replicaInfo, v, dest.getParentFile());
|
||||||
}
|
}
|
||||||
volumeMap.add(bpid, newReplicaInfo);
|
volumeMap.add(bpid, newReplicaInfo);
|
||||||
|
|
|
@ -28,6 +28,7 @@ import java.util.concurrent.LinkedBlockingQueue;
|
||||||
import java.util.concurrent.ThreadFactory;
|
import java.util.concurrent.ThreadFactory;
|
||||||
import java.util.concurrent.ThreadPoolExecutor;
|
import java.util.concurrent.ThreadPoolExecutor;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
@ -62,6 +63,9 @@ public class FsVolumeImpl implements FsVolumeSpi {
|
||||||
private final DF usage;
|
private final DF usage;
|
||||||
private final long reserved;
|
private final long reserved;
|
||||||
|
|
||||||
|
// Disk space reserved for open blocks.
|
||||||
|
private AtomicLong reservedForRbw;
|
||||||
|
|
||||||
// Capacity configured. This is useful when we want to
|
// Capacity configured. This is useful when we want to
|
||||||
// limit the visible capacity for tests. If negative, then we just
|
// limit the visible capacity for tests. If negative, then we just
|
||||||
// query from the filesystem.
|
// query from the filesystem.
|
||||||
|
@ -82,6 +86,7 @@ public class FsVolumeImpl implements FsVolumeSpi {
|
||||||
this.reserved = conf.getLong(
|
this.reserved = conf.getLong(
|
||||||
DFSConfigKeys.DFS_DATANODE_DU_RESERVED_KEY,
|
DFSConfigKeys.DFS_DATANODE_DU_RESERVED_KEY,
|
||||||
DFSConfigKeys.DFS_DATANODE_DU_RESERVED_DEFAULT);
|
DFSConfigKeys.DFS_DATANODE_DU_RESERVED_DEFAULT);
|
||||||
|
this.reservedForRbw = new AtomicLong(0L);
|
||||||
this.currentDir = currentDir;
|
this.currentDir = currentDir;
|
||||||
File parent = currentDir.getParentFile();
|
File parent = currentDir.getParentFile();
|
||||||
this.usage = new DF(parent, conf);
|
this.usage = new DF(parent, conf);
|
||||||
|
@ -166,7 +171,7 @@ public class FsVolumeImpl implements FsVolumeSpi {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getAvailable() throws IOException {
|
public long getAvailable() throws IOException {
|
||||||
long remaining = getCapacity()-getDfsUsed();
|
long remaining = getCapacity() - getDfsUsed() - reservedForRbw.get();
|
||||||
long available = usage.getAvailable();
|
long available = usage.getAvailable();
|
||||||
if (remaining > available) {
|
if (remaining > available) {
|
||||||
remaining = available;
|
remaining = available;
|
||||||
|
@ -174,6 +179,11 @@ public class FsVolumeImpl implements FsVolumeSpi {
|
||||||
return (remaining > 0) ? remaining : 0;
|
return (remaining > 0) ? remaining : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public long getReservedForRbw() {
|
||||||
|
return reservedForRbw.get();
|
||||||
|
}
|
||||||
|
|
||||||
long getReserved(){
|
long getReserved(){
|
||||||
return reserved;
|
return reserved;
|
||||||
}
|
}
|
||||||
|
@ -217,16 +227,58 @@ public class FsVolumeImpl implements FsVolumeSpi {
|
||||||
return getBlockPoolSlice(bpid).createTmpFile(b);
|
return getBlockPoolSlice(bpid).createTmpFile(b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reserveSpaceForRbw(long bytesToReserve) {
|
||||||
|
if (bytesToReserve != 0) {
|
||||||
|
if (FsDatasetImpl.LOG.isDebugEnabled()) {
|
||||||
|
FsDatasetImpl.LOG.debug("Reserving " + bytesToReserve + " on volume " + getBasePath());
|
||||||
|
}
|
||||||
|
reservedForRbw.addAndGet(bytesToReserve);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void releaseReservedSpace(long bytesToRelease) {
|
||||||
|
if (bytesToRelease != 0) {
|
||||||
|
if (FsDatasetImpl.LOG.isDebugEnabled()) {
|
||||||
|
FsDatasetImpl.LOG.debug("Releasing " + bytesToRelease + " on volume " + getBasePath());
|
||||||
|
}
|
||||||
|
|
||||||
|
long oldReservation, newReservation;
|
||||||
|
do {
|
||||||
|
oldReservation = reservedForRbw.get();
|
||||||
|
newReservation = oldReservation - bytesToRelease;
|
||||||
|
if (newReservation < 0) {
|
||||||
|
// Failsafe, this should never occur in practice, but if it does we don't
|
||||||
|
// want to start advertising more space than we have available.
|
||||||
|
newReservation = 0;
|
||||||
|
}
|
||||||
|
} while (!reservedForRbw.compareAndSet(oldReservation, newReservation));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* RBW files. They get moved to the finalized block directory when
|
* RBW files. They get moved to the finalized block directory when
|
||||||
* the block is finalized.
|
* the block is finalized.
|
||||||
*/
|
*/
|
||||||
File createRbwFile(String bpid, Block b) throws IOException {
|
File createRbwFile(String bpid, Block b) throws IOException {
|
||||||
|
reserveSpaceForRbw(b.getNumBytes());
|
||||||
return getBlockPoolSlice(bpid).createRbwFile(b);
|
return getBlockPoolSlice(bpid).createRbwFile(b);
|
||||||
}
|
}
|
||||||
|
|
||||||
File addBlock(String bpid, Block b, File f) throws IOException {
|
/**
|
||||||
return getBlockPoolSlice(bpid).addBlock(b, f);
|
*
|
||||||
|
* @param bytesReservedForRbw Space that was reserved during
|
||||||
|
* block creation. Now that the block is being finalized we
|
||||||
|
* can free up this space.
|
||||||
|
* @return
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
File addFinalizedBlock(String bpid, Block b,
|
||||||
|
File f, long bytesReservedForRbw)
|
||||||
|
throws IOException {
|
||||||
|
releaseReservedSpace(bytesReservedForRbw);
|
||||||
|
return getBlockPoolSlice(bpid).addFinalizedBlock(b, f);
|
||||||
}
|
}
|
||||||
|
|
||||||
Executor getCacheExecutor() {
|
Executor getCacheExecutor() {
|
||||||
|
|
|
@ -424,6 +424,14 @@ public class TestDirectoryScanner {
|
||||||
public String getStorageID() {
|
public String getStorageID() {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reserveSpaceForRbw(long bytesToReserve) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void releaseReservedSpace(long bytesToRelease) {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static TestFsVolumeSpi TEST_VOLUME = new TestFsVolumeSpi();
|
private final static TestFsVolumeSpi TEST_VOLUME = new TestFsVolumeSpi();
|
||||||
|
|
|
@ -0,0 +1,288 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.commons.logging.impl.Log4JLogger;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
|
||||||
|
import static org.hamcrest.core.Is.is;
|
||||||
|
import static org.junit.Assert.assertThat;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.DU;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hdfs.*;
|
||||||
|
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
|
import org.apache.hadoop.util.Daemon;
|
||||||
|
import org.apache.log4j.Level;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure that the DN reserves disk space equivalent to a full block for
|
||||||
|
* replica being written (RBW).
|
||||||
|
*/
|
||||||
|
public class TestRbwSpaceReservation {
|
||||||
|
static final Log LOG = LogFactory.getLog(TestRbwSpaceReservation.class);
|
||||||
|
|
||||||
|
private static final short REPL_FACTOR = 1;
|
||||||
|
private static final int DU_REFRESH_INTERVAL_MSEC = 500;
|
||||||
|
private static final int STORAGES_PER_DATANODE = 1;
|
||||||
|
private static final int BLOCK_SIZE = 1024 * 1024;
|
||||||
|
private static final int SMALL_BLOCK_SIZE = 1024;
|
||||||
|
|
||||||
|
protected MiniDFSCluster cluster;
|
||||||
|
private Configuration conf;
|
||||||
|
private DistributedFileSystem fs = null;
|
||||||
|
private DFSClient client = null;
|
||||||
|
FsVolumeImpl singletonVolume = null;
|
||||||
|
|
||||||
|
private static Random rand = new Random();
|
||||||
|
|
||||||
|
private void initConfig(int blockSize) {
|
||||||
|
conf = new HdfsConfiguration();
|
||||||
|
|
||||||
|
// Refresh disk usage information frequently.
|
||||||
|
conf.setInt(FS_DU_INTERVAL_KEY, DU_REFRESH_INTERVAL_MSEC);
|
||||||
|
conf.setLong(DFS_BLOCK_SIZE_KEY, blockSize);
|
||||||
|
|
||||||
|
// Disable the scanner
|
||||||
|
conf.setInt(DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static {
|
||||||
|
((Log4JLogger) FsDatasetImpl.LOG).getLogger().setLevel(Level.ALL);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void startCluster(int blockSize, long perVolumeCapacity) throws IOException {
|
||||||
|
initConfig(blockSize);
|
||||||
|
|
||||||
|
cluster = new MiniDFSCluster
|
||||||
|
.Builder(conf)
|
||||||
|
.storagesPerDatanode(STORAGES_PER_DATANODE)
|
||||||
|
.numDataNodes(REPL_FACTOR)
|
||||||
|
.build();
|
||||||
|
fs = cluster.getFileSystem();
|
||||||
|
client = fs.getClient();
|
||||||
|
cluster.waitActive();
|
||||||
|
|
||||||
|
if (perVolumeCapacity >= 0) {
|
||||||
|
List<? extends FsVolumeSpi> volumes =
|
||||||
|
cluster.getDataNodes().get(0).getFSDataset().getVolumes();
|
||||||
|
|
||||||
|
assertThat(volumes.size(), is(1));
|
||||||
|
singletonVolume = ((FsVolumeImpl) volumes.get(0));
|
||||||
|
singletonVolume.setCapacityForTesting(perVolumeCapacity);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void shutdownCluster() throws IOException {
|
||||||
|
if (client != null) {
|
||||||
|
client.close();
|
||||||
|
client = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fs != null) {
|
||||||
|
fs.close();
|
||||||
|
fs = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
cluster = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createFileAndTestSpaceReservation(
|
||||||
|
final String fileNamePrefix, final int fileBlockSize)
|
||||||
|
throws IOException, InterruptedException {
|
||||||
|
// Enough for 1 block + meta files + some delta.
|
||||||
|
final long configuredCapacity = fileBlockSize * 2 - 1;
|
||||||
|
startCluster(BLOCK_SIZE, configuredCapacity);
|
||||||
|
FSDataOutputStream out = null;
|
||||||
|
Path path = new Path("/" + fileNamePrefix + ".dat");
|
||||||
|
|
||||||
|
try {
|
||||||
|
out = fs.create(path, false, 4096, (short) 1, fileBlockSize);
|
||||||
|
|
||||||
|
byte[] buffer = new byte[rand.nextInt(fileBlockSize / 4)];
|
||||||
|
out.write(buffer);
|
||||||
|
out.hsync();
|
||||||
|
int bytesWritten = buffer.length;
|
||||||
|
|
||||||
|
// Check that space was reserved for a full block minus the bytesWritten.
|
||||||
|
assertThat(singletonVolume.getReservedForRbw(),
|
||||||
|
is((long) fileBlockSize - bytesWritten));
|
||||||
|
out.close();
|
||||||
|
out = null;
|
||||||
|
|
||||||
|
// Check that the reserved space has been released since we closed the
|
||||||
|
// file.
|
||||||
|
assertThat(singletonVolume.getReservedForRbw(), is(0L));
|
||||||
|
|
||||||
|
// Reopen the file for appends and write 1 more byte.
|
||||||
|
out = fs.append(path);
|
||||||
|
out.write(buffer);
|
||||||
|
out.hsync();
|
||||||
|
bytesWritten += buffer.length;
|
||||||
|
|
||||||
|
// Check that space was again reserved for a full block minus the
|
||||||
|
// bytesWritten so far.
|
||||||
|
assertThat(singletonVolume.getReservedForRbw(),
|
||||||
|
is((long) fileBlockSize - bytesWritten));
|
||||||
|
|
||||||
|
// Write once again and again verify the available space. This ensures
|
||||||
|
// that the reserved space is progressively adjusted to account for bytes
|
||||||
|
// written to disk.
|
||||||
|
out.write(buffer);
|
||||||
|
out.hsync();
|
||||||
|
bytesWritten += buffer.length;
|
||||||
|
assertThat(singletonVolume.getReservedForRbw(),
|
||||||
|
is((long) fileBlockSize - bytesWritten));
|
||||||
|
} finally {
|
||||||
|
if (out != null) {
|
||||||
|
out.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test (timeout=300000)
|
||||||
|
public void testWithDefaultBlockSize()
|
||||||
|
throws IOException, InterruptedException {
|
||||||
|
createFileAndTestSpaceReservation(GenericTestUtils.getMethodName(), BLOCK_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test (timeout=300000)
|
||||||
|
public void testWithNonDefaultBlockSize()
|
||||||
|
throws IOException, InterruptedException {
|
||||||
|
// Same test as previous one, but with a non-default block size.
|
||||||
|
createFileAndTestSpaceReservation(GenericTestUtils.getMethodName(), BLOCK_SIZE * 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stress test to ensure we are not leaking reserved space.
|
||||||
|
* @throws IOException
|
||||||
|
* @throws InterruptedException
|
||||||
|
*/
|
||||||
|
@Test (timeout=600000)
|
||||||
|
public void stressTest() throws IOException, InterruptedException {
|
||||||
|
final int numWriters = 5;
|
||||||
|
startCluster(SMALL_BLOCK_SIZE, SMALL_BLOCK_SIZE * numWriters * 10);
|
||||||
|
Writer[] writers = new Writer[numWriters];
|
||||||
|
|
||||||
|
// Start a few writers and let them run for a while.
|
||||||
|
for (int i = 0; i < numWriters; ++i) {
|
||||||
|
writers[i] = new Writer(client, SMALL_BLOCK_SIZE);
|
||||||
|
writers[i].start();
|
||||||
|
}
|
||||||
|
|
||||||
|
Thread.sleep(60000);
|
||||||
|
|
||||||
|
// Stop the writers.
|
||||||
|
for (Writer w : writers) {
|
||||||
|
w.stopWriter();
|
||||||
|
}
|
||||||
|
int filesCreated = 0;
|
||||||
|
int numFailures = 0;
|
||||||
|
for (Writer w : writers) {
|
||||||
|
w.join();
|
||||||
|
filesCreated += w.getFilesCreated();
|
||||||
|
numFailures += w.getNumFailures();
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG.info("Stress test created " + filesCreated +
|
||||||
|
" files and hit " + numFailures + " failures");
|
||||||
|
|
||||||
|
// Check no space was leaked.
|
||||||
|
assertThat(singletonVolume.getReservedForRbw(), is(0L));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class Writer extends Daemon {
|
||||||
|
private volatile boolean keepRunning;
|
||||||
|
private final DFSClient localClient;
|
||||||
|
private int filesCreated = 0;
|
||||||
|
private int numFailures = 0;
|
||||||
|
byte[] data;
|
||||||
|
|
||||||
|
Writer(DFSClient client, int blockSize) throws IOException {
|
||||||
|
localClient = client;
|
||||||
|
keepRunning = true;
|
||||||
|
filesCreated = 0;
|
||||||
|
numFailures = 0;
|
||||||
|
|
||||||
|
// At least some of the files should span a block boundary.
|
||||||
|
data = new byte[blockSize * 2];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
/**
|
||||||
|
* Create a file, write up to 3 blocks of data and close the file.
|
||||||
|
* Do this in a loop until we are told to stop.
|
||||||
|
*/
|
||||||
|
while (keepRunning) {
|
||||||
|
OutputStream os = null;
|
||||||
|
try {
|
||||||
|
String filename = "/file-" + rand.nextLong();
|
||||||
|
os = localClient.create(filename, false);
|
||||||
|
os.write(data, 0, rand.nextInt(data.length));
|
||||||
|
IOUtils.closeQuietly(os);
|
||||||
|
os = null;
|
||||||
|
localClient.delete(filename, false);
|
||||||
|
Thread.sleep(50); // Sleep for a bit to avoid killing the system.
|
||||||
|
++filesCreated;
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
// Just ignore the exception and keep going.
|
||||||
|
++numFailures;
|
||||||
|
} catch (InterruptedException ie) {
|
||||||
|
return;
|
||||||
|
} finally {
|
||||||
|
if (os != null) {
|
||||||
|
IOUtils.closeQuietly(os);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void stopWriter() {
|
||||||
|
keepRunning = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getFilesCreated() {
|
||||||
|
return filesCreated;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getNumFailures() {
|
||||||
|
return numFailures;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -158,7 +158,7 @@ public class TestWriteToReplica {
|
||||||
replicasMap.add(bpid, new ReplicaInPipeline(
|
replicasMap.add(bpid, new ReplicaInPipeline(
|
||||||
blocks[TEMPORARY].getBlockId(),
|
blocks[TEMPORARY].getBlockId(),
|
||||||
blocks[TEMPORARY].getGenerationStamp(), vol,
|
blocks[TEMPORARY].getGenerationStamp(), vol,
|
||||||
vol.createTmpFile(bpid, blocks[TEMPORARY].getLocalBlock()).getParentFile()));
|
vol.createTmpFile(bpid, blocks[TEMPORARY].getLocalBlock()).getParentFile(), 0));
|
||||||
|
|
||||||
replicaInfo = new ReplicaBeingWritten(blocks[RBW].getLocalBlock(), vol,
|
replicaInfo = new ReplicaBeingWritten(blocks[RBW].getLocalBlock(), vol,
|
||||||
vol.createRbwFile(bpid, blocks[RBW].getLocalBlock()).getParentFile(), null);
|
vol.createRbwFile(bpid, blocks[RBW].getLocalBlock()).getParentFile(), null);
|
||||||
|
|
Loading…
Reference in New Issue