HDFS-4213. Add an API to hsync for updating the last block length at the namenode. Contributed by Jing Zhao
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1415799 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d866f81edb
commit
571da54179
|
@ -389,6 +389,9 @@ Release 2.0.3-alpha - Unreleased
|
|||
|
||||
HDFS-4155. libhdfs implementation of hsync API (Liang Xie via todd)
|
||||
|
||||
HDFS-4213. Add an API to hsync for updating the last block length at the
|
||||
namenode. (Jing Zhao via szetszwo)
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
HDFS-3925. Prettify PipelineAck#toString() for printing to a log
|
||||
|
|
|
@ -46,6 +46,7 @@ import org.apache.hadoop.fs.Syncable;
|
|||
import org.apache.hadoop.fs.UnresolvedLinkException;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
|
||||
import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag;
|
||||
import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
|
||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||
|
@ -1487,9 +1488,14 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
|
|||
*/
|
||||
@Override
|
||||
public void hflush() throws IOException {
|
||||
flushOrSync(false);
|
||||
flushOrSync(false, EnumSet.noneOf(SyncFlag.class));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void hsync() throws IOException {
|
||||
hsync(EnumSet.noneOf(SyncFlag.class));
|
||||
}
|
||||
|
||||
/**
|
||||
* The expected semantics is all data have flushed out to all replicas
|
||||
* and all replicas have done posix fsync equivalent - ie the OS has
|
||||
|
@ -1498,17 +1504,35 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
|
|||
* Note that only the current block is flushed to the disk device.
|
||||
* To guarantee durable sync across block boundaries the stream should
|
||||
* be created with {@link CreateFlag#SYNC_BLOCK}.
|
||||
*
|
||||
* @param syncFlags
|
||||
* Indicate the semantic of the sync. Currently used to specify
|
||||
* whether or not to update the block length in NameNode.
|
||||
*/
|
||||
@Override
|
||||
public void hsync() throws IOException {
|
||||
flushOrSync(true);
|
||||
public void hsync(EnumSet<SyncFlag> syncFlags) throws IOException {
|
||||
flushOrSync(true, syncFlags);
|
||||
}
|
||||
|
||||
private void flushOrSync(boolean isSync) throws IOException {
|
||||
/**
|
||||
* Flush/Sync buffered data to DataNodes.
|
||||
*
|
||||
* @param isSync
|
||||
* Whether or not to require all replicas to flush data to the disk
|
||||
* device
|
||||
* @param syncFlags
|
||||
* Indicate extra detailed semantic of the flush/sync. Currently
|
||||
* mainly used to specify whether or not to update the file length in
|
||||
* the NameNode
|
||||
* @throws IOException
|
||||
*/
|
||||
private void flushOrSync(boolean isSync, EnumSet<SyncFlag> syncFlags)
|
||||
throws IOException {
|
||||
dfsClient.checkOpen();
|
||||
isClosed();
|
||||
try {
|
||||
long toWaitFor;
|
||||
long lastBlockLength = -1L;
|
||||
boolean updateLength = syncFlags.contains(SyncFlag.UPDATE_LENGTH);
|
||||
synchronized (this) {
|
||||
/* Record current blockOffset. This might be changed inside
|
||||
* flushBuffer() where a partial checksum chunk might be flushed.
|
||||
|
@ -1572,13 +1596,20 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
|
|||
} // end synchronized
|
||||
|
||||
waitForAckedSeqno(toWaitFor);
|
||||
|
||||
// If any new blocks were allocated since the last flush,
|
||||
// then persist block locations on namenode.
|
||||
//
|
||||
if (persistBlocks.getAndSet(false)) {
|
||||
|
||||
if (updateLength) {
|
||||
synchronized (this) {
|
||||
if (streamer != null && streamer.block != null) {
|
||||
lastBlockLength = streamer.block.getNumBytes();
|
||||
}
|
||||
}
|
||||
}
|
||||
// If 1) any new blocks were allocated since the last flush, or 2) to
|
||||
// update length in NN is requried, then persist block locations on
|
||||
// namenode.
|
||||
if (persistBlocks.getAndSet(false) || updateLength) {
|
||||
try {
|
||||
dfsClient.namenode.fsync(src, dfsClient.clientName);
|
||||
dfsClient.namenode.fsync(src, dfsClient.clientName, lastBlockLength);
|
||||
} catch (IOException ioe) {
|
||||
DFSClient.LOG.warn("Unable to persist blocks in hflush for " + src, ioe);
|
||||
// If we got an error here, it might be because some other thread called
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
package org.apache.hadoop.hdfs.client;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.EnumSet;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
|
@ -56,4 +57,24 @@ public class HdfsDataOutputStream extends FSDataOutputStream {
|
|||
public synchronized int getCurrentBlockReplication() throws IOException {
|
||||
return ((DFSOutputStream)getWrappedStream()).getCurrentBlockReplication();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sync buffered data to DataNodes (flush to disk devices).
|
||||
*
|
||||
* @param syncFlags
|
||||
* Indicate the detailed semantic and actions of the hsync.
|
||||
* @throws IOException
|
||||
* @see FSDataOutputStream#hsync()
|
||||
*/
|
||||
public void hsync(EnumSet<SyncFlag> syncFlags) throws IOException {
|
||||
((DFSOutputStream) getWrappedStream()).hsync(syncFlags);
|
||||
}
|
||||
|
||||
public static enum SyncFlag {
|
||||
/**
|
||||
* When doing sync to DataNodes, also update the metadata (block
|
||||
* length) in the NameNode
|
||||
*/
|
||||
UPDATE_LENGTH;
|
||||
}
|
||||
}
|
|
@ -815,14 +815,15 @@ public interface ClientProtocol {
|
|||
* The file must be currently open for writing.
|
||||
* @param src The string representation of the path
|
||||
* @param client The string representation of the client
|
||||
*
|
||||
* @param lastBlockLength The length of the last block (under construction)
|
||||
* to be reported to NameNode
|
||||
* @throws AccessControlException permission denied
|
||||
* @throws FileNotFoundException file <code>src</code> is not found
|
||||
* @throws UnresolvedLinkException if <code>src</code> contains a symlink.
|
||||
* @throws IOException If an I/O error occurred
|
||||
*/
|
||||
@Idempotent
|
||||
public void fsync(String src, String client)
|
||||
public void fsync(String src, String client, long lastBlockLength)
|
||||
throws AccessControlException, FileNotFoundException,
|
||||
UnresolvedLinkException, IOException;
|
||||
|
||||
|
|
|
@ -688,7 +688,7 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
|
|||
public FsyncResponseProto fsync(RpcController controller,
|
||||
FsyncRequestProto req) throws ServiceException {
|
||||
try {
|
||||
server.fsync(req.getSrc(), req.getClient());
|
||||
server.fsync(req.getSrc(), req.getClient(), req.getLastBlockLength());
|
||||
return VOID_FSYNC_RESPONSE;
|
||||
} catch (IOException e) {
|
||||
throw new ServiceException(e);
|
||||
|
|
|
@ -659,12 +659,11 @@ public class ClientNamenodeProtocolTranslatorPB implements
|
|||
}
|
||||
|
||||
@Override
|
||||
public void fsync(String src, String client) throws AccessControlException,
|
||||
FileNotFoundException, UnresolvedLinkException, IOException {
|
||||
FsyncRequestProto req = FsyncRequestProto.newBuilder()
|
||||
.setSrc(src)
|
||||
.setClient(client)
|
||||
.build();
|
||||
public void fsync(String src, String client, long lastBlockLength)
|
||||
throws AccessControlException, FileNotFoundException,
|
||||
UnresolvedLinkException, IOException {
|
||||
FsyncRequestProto req = FsyncRequestProto.newBuilder().setSrc(src)
|
||||
.setClient(client).setLastBlockLength(lastBlockLength).build();
|
||||
try {
|
||||
rpcProxy.fsync(null, req);
|
||||
} catch (ServiceException e) {
|
||||
|
|
|
@ -2979,9 +2979,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
/** Persist all metadata about this file.
|
||||
* @param src The string representation of the path
|
||||
* @param clientName The string representation of the client
|
||||
* @param lastBlockLength The length of the last block
|
||||
* under construction reported from client.
|
||||
* @throws IOException if path does not exist
|
||||
*/
|
||||
void fsync(String src, String clientName)
|
||||
void fsync(String src, String clientName, long lastBlockLength)
|
||||
throws IOException, UnresolvedLinkException {
|
||||
NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName);
|
||||
writeLock();
|
||||
|
@ -2991,6 +2993,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
throw new SafeModeException("Cannot fsync file " + src, safeMode);
|
||||
}
|
||||
INodeFileUnderConstruction pendingFile = checkLease(src, clientName);
|
||||
if (lastBlockLength > 0) {
|
||||
pendingFile.updateLengthOfLastBlock(lastBlockLength);
|
||||
}
|
||||
dir.persistBlocks(src, pendingFile);
|
||||
} finally {
|
||||
writeUnlock();
|
||||
|
|
|
@ -171,4 +171,22 @@ class INodeFileUnderConstruction extends INodeFile implements MutableBlockCollec
|
|||
setBlock(numBlocks()-1, ucBlock);
|
||||
return ucBlock;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the length for the last block
|
||||
*
|
||||
* @param lastBlockLength
|
||||
* The length of the last block reported from client
|
||||
* @throws IOException
|
||||
*/
|
||||
void updateLengthOfLastBlock(long lastBlockLength) throws IOException {
|
||||
BlockInfo lastBlock = this.getLastBlock();
|
||||
assert (lastBlock != null) : "The last block for path "
|
||||
+ this.getFullPathName() + " is null when updating its length";
|
||||
assert (lastBlock instanceof BlockInfoUnderConstruction) : "The last block for path "
|
||||
+ this.getFullPathName()
|
||||
+ " is not a BlockInfoUnderConstruction when updating its length";
|
||||
lastBlock.setNumBytes(lastBlockLength);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -825,8 +825,9 @@ class NameNodeRpcServer implements NamenodeProtocols {
|
|||
}
|
||||
|
||||
@Override // ClientProtocol
|
||||
public void fsync(String src, String clientName) throws IOException {
|
||||
namesystem.fsync(src, clientName);
|
||||
public void fsync(String src, String clientName, long lastBlockLength)
|
||||
throws IOException {
|
||||
namesystem.fsync(src, clientName, lastBlockLength);
|
||||
}
|
||||
|
||||
@Override // ClientProtocol
|
||||
|
|
|
@ -357,6 +357,7 @@ message SetQuotaResponseProto { // void response
|
|||
message FsyncRequestProto {
|
||||
required string src = 1;
|
||||
required string client = 2;
|
||||
optional sint64 lastBlockLength = 3 [default = -1];
|
||||
}
|
||||
|
||||
message FsyncResponseProto { // void response
|
||||
|
|
|
@ -23,12 +23,14 @@ import static org.junit.Assert.assertTrue;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.InterruptedIOException;
|
||||
import java.util.EnumSet;
|
||||
|
||||
import org.apache.commons.logging.impl.Log4JLogger;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||
import org.apache.log4j.Level;
|
||||
import org.junit.Test;
|
||||
|
@ -43,16 +45,21 @@ public class TestHFlush {
|
|||
|
||||
private final String fName = "hflushtest.dat";
|
||||
|
||||
/** The test uses {@link #doTheJob(Configuration, String, long, short)
|
||||
/**
|
||||
* The test uses
|
||||
* {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)}
|
||||
* to write a file with a standard block size
|
||||
*/
|
||||
@Test
|
||||
public void hFlush_01() throws IOException {
|
||||
doTheJob(new HdfsConfiguration(), fName, AppendTestUtil.BLOCK_SIZE, (short)2);
|
||||
doTheJob(new HdfsConfiguration(), fName, AppendTestUtil.BLOCK_SIZE,
|
||||
(short) 2, false, EnumSet.noneOf(SyncFlag.class));
|
||||
}
|
||||
|
||||
/** The test uses {@link #doTheJob(Configuration, String, long, short)
|
||||
* to write a file with a custom block size so the writes will be
|
||||
/**
|
||||
* The test uses
|
||||
* {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)}
|
||||
* to write a file with a custom block size so the writes will be
|
||||
* happening across block' boundaries
|
||||
*/
|
||||
@Test
|
||||
|
@ -64,14 +71,17 @@ public class TestHFlush {
|
|||
conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
|
||||
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
|
||||
|
||||
doTheJob(conf, fName, customBlockSize, (short)2);
|
||||
doTheJob(conf, fName, customBlockSize, (short) 2, false,
|
||||
EnumSet.noneOf(SyncFlag.class));
|
||||
}
|
||||
|
||||
/** The test uses {@link #doTheJob(Configuration, String, long, short)
|
||||
* to write a file with a custom block size so the writes will be
|
||||
/**
|
||||
* The test uses
|
||||
* {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)}
|
||||
* to write a file with a custom block size so the writes will be
|
||||
* happening across block's and checksum' boundaries
|
||||
*/
|
||||
@Test
|
||||
@Test
|
||||
public void hFlush_03() throws IOException {
|
||||
Configuration conf = new HdfsConfiguration();
|
||||
int customPerChecksumSize = 400;
|
||||
|
@ -80,22 +90,106 @@ public class TestHFlush {
|
|||
conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
|
||||
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
|
||||
|
||||
doTheJob(conf, fName, customBlockSize, (short)2);
|
||||
doTheJob(conf, fName, customBlockSize, (short) 2, false,
|
||||
EnumSet.noneOf(SyncFlag.class));
|
||||
}
|
||||
|
||||
/**
|
||||
The method starts new cluster with defined Configuration;
|
||||
creates a file with specified block_size and writes 10 equal sections in it;
|
||||
it also calls hflush() after each write and throws an IOException in case of
|
||||
an error.
|
||||
@param conf cluster configuration
|
||||
@param fileName of the file to be created and processed as required
|
||||
@param block_size value to be used for the file's creation
|
||||
@param replicas is the number of replicas
|
||||
@throws IOException in case of any errors
|
||||
* Test hsync (with updating block length in NameNode) while no data is
|
||||
* actually written yet
|
||||
*/
|
||||
@Test
|
||||
public void hSyncUpdateLength_00() throws IOException {
|
||||
Configuration conf = new HdfsConfiguration();
|
||||
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(
|
||||
2).build();
|
||||
DistributedFileSystem fileSystem =
|
||||
(DistributedFileSystem)cluster.getFileSystem();
|
||||
|
||||
try {
|
||||
Path path = new Path(fName);
|
||||
FSDataOutputStream stm = fileSystem.create(path, true, 4096, (short) 2,
|
||||
AppendTestUtil.BLOCK_SIZE);
|
||||
System.out.println("Created file " + path.toString());
|
||||
((DFSOutputStream) stm.getWrappedStream()).hsync(EnumSet
|
||||
.of(SyncFlag.UPDATE_LENGTH));
|
||||
long currentFileLength = fileSystem.getFileStatus(path).getLen();
|
||||
assertEquals(0L, currentFileLength);
|
||||
stm.close();
|
||||
} finally {
|
||||
fileSystem.close();
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The test calls
|
||||
* {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)}
|
||||
* while requiring the semantic of {@link SyncFlag#UPDATE_LENGTH}.
|
||||
*/
|
||||
@Test
|
||||
public void hSyncUpdateLength_01() throws IOException {
|
||||
doTheJob(new HdfsConfiguration(), fName, AppendTestUtil.BLOCK_SIZE,
|
||||
(short) 2, true, EnumSet.of(SyncFlag.UPDATE_LENGTH));
|
||||
}
|
||||
|
||||
/**
|
||||
* The test calls
|
||||
* {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)}
|
||||
* while requiring the semantic of {@link SyncFlag#UPDATE_LENGTH}.
|
||||
* Similar with {@link #hFlush_02()} , it writes a file with a custom block
|
||||
* size so the writes will be happening across block' boundaries
|
||||
*/
|
||||
@Test
|
||||
public void hSyncUpdateLength_02() throws IOException {
|
||||
Configuration conf = new HdfsConfiguration();
|
||||
int customPerChecksumSize = 512;
|
||||
int customBlockSize = customPerChecksumSize * 3;
|
||||
// Modify defaul filesystem settings
|
||||
conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
|
||||
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
|
||||
|
||||
doTheJob(conf, fName, customBlockSize, (short) 2, true,
|
||||
EnumSet.of(SyncFlag.UPDATE_LENGTH));
|
||||
}
|
||||
|
||||
/**
|
||||
* The test calls
|
||||
* {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)}
|
||||
* while requiring the semantic of {@link SyncFlag#UPDATE_LENGTH}.
|
||||
* Similar with {@link #hFlush_03()} , it writes a file with a custom block
|
||||
* size so the writes will be happening across block's and checksum'
|
||||
* boundaries.
|
||||
*/
|
||||
@Test
|
||||
public void hSyncUpdateLength_03() throws IOException {
|
||||
Configuration conf = new HdfsConfiguration();
|
||||
int customPerChecksumSize = 400;
|
||||
int customBlockSize = customPerChecksumSize * 3;
|
||||
// Modify defaul filesystem settings
|
||||
conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
|
||||
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
|
||||
|
||||
doTheJob(conf, fName, customBlockSize, (short) 2, true,
|
||||
EnumSet.of(SyncFlag.UPDATE_LENGTH));
|
||||
}
|
||||
|
||||
/**
|
||||
* The method starts new cluster with defined Configuration; creates a file
|
||||
* with specified block_size and writes 10 equal sections in it; it also calls
|
||||
* hflush/hsync after each write and throws an IOException in case of an error.
|
||||
*
|
||||
* @param conf cluster configuration
|
||||
* @param fileName of the file to be created and processed as required
|
||||
* @param block_size value to be used for the file's creation
|
||||
* @param replicas is the number of replicas
|
||||
* @param isSync hsync or hflush
|
||||
* @param syncFlags specify the semantic of the sync/flush
|
||||
* @throws IOException in case of any errors
|
||||
*/
|
||||
public static void doTheJob(Configuration conf, final String fileName,
|
||||
long block_size, short replicas) throws IOException {
|
||||
long block_size, short replicas, boolean isSync,
|
||||
EnumSet<SyncFlag> syncFlags) throws IOException {
|
||||
byte[] fileContent;
|
||||
final int SECTIONS = 10;
|
||||
|
||||
|
@ -119,8 +213,21 @@ public class TestHFlush {
|
|||
System.out.println("Writing " + (tenth * i) + " to " + (tenth * (i+1)) + " section to file " + fileName);
|
||||
// write to the file
|
||||
stm.write(fileContent, tenth * i, tenth);
|
||||
// Wait while hflush() pushes all packets through built pipeline
|
||||
((DFSOutputStream)stm.getWrappedStream()).hflush();
|
||||
|
||||
// Wait while hflush/hsync pushes all packets through built pipeline
|
||||
if (isSync) {
|
||||
((DFSOutputStream)stm.getWrappedStream()).hsync(syncFlags);
|
||||
} else {
|
||||
((DFSOutputStream)stm.getWrappedStream()).hflush();
|
||||
}
|
||||
|
||||
// Check file length if updatelength is required
|
||||
if (isSync && syncFlags.contains(SyncFlag.UPDATE_LENGTH)) {
|
||||
long currentFileLength = fileSystem.getFileStatus(path).getLen();
|
||||
assertEquals(
|
||||
"File size doesn't match for hsync/hflush with updating the length",
|
||||
tenth * (i + 1), currentFileLength);
|
||||
}
|
||||
byte [] toRead = new byte[tenth];
|
||||
byte [] expected = new byte[tenth];
|
||||
System.arraycopy(fileContent, tenth * i, expected, 0, tenth);
|
||||
|
@ -139,8 +246,6 @@ public class TestHFlush {
|
|||
|
||||
assertEquals("File size doesn't match ", AppendTestUtil.FILE_SIZE, fileSystem.getFileStatus(path).getLen());
|
||||
AppendTestUtil.checkFullFile(fileSystem, path, fileContent.length, fileContent, "hflush()");
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
fileSystem.close();
|
||||
cluster.shutdown();
|
||||
|
|
Loading…
Reference in New Issue