HDFS-6970. Move startFile EDEK retries to the DFSClient. (wang)

(cherry picked from commit 20a076bafc)

Conflicts:
	hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
This commit is contained in:
Andrew Wang 2014-09-18 17:35:24 -07:00
parent d329237990
commit ef693b541c
5 changed files with 122 additions and 91 deletions

View File

@ -225,6 +225,8 @@ Release 2.6.0 - UNRELEASED
HDFS-6727. Refresh data volumes on DataNode based on configuration changes HDFS-6727. Refresh data volumes on DataNode based on configuration changes
(Lei Xu via cmccabe) (Lei Xu via cmccabe)
HDFS-6970. Move startFile EDEK retries to the DFSClient. (wang)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-6690. Deduplicate xattr names in memory. (wang) HDFS-6690. Deduplicate xattr names in memory. (wang)

View File

@ -41,6 +41,7 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.AtomicReference;
import com.google.common.base.Preconditions;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.crypto.CipherSuite; import org.apache.hadoop.crypto.CipherSuite;
import org.apache.hadoop.fs.CanSetDropBehind; import org.apache.hadoop.fs.CanSetDropBehind;
@ -76,6 +77,7 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException; import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy; import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
import org.apache.hadoop.hdfs.server.namenode.NotReplicatedYetException; import org.apache.hadoop.hdfs.server.namenode.NotReplicatedYetException;
import org.apache.hadoop.hdfs.server.namenode.RetryStartFileException;
import org.apache.hadoop.hdfs.server.namenode.SafeModeException; import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
import org.apache.hadoop.io.EnumSetWritable; import org.apache.hadoop.io.EnumSetWritable;
import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IOUtils;
@ -127,6 +129,13 @@ public class DFSOutputStream extends FSOutputSummer
implements Syncable, CanSetDropBehind { implements Syncable, CanSetDropBehind {
private final long dfsclientSlowLogThresholdMs; private final long dfsclientSlowLogThresholdMs;
private static final int MAX_PACKETS = 80; // each packet 64K, total 5MB private static final int MAX_PACKETS = 80; // each packet 64K, total 5MB
/**
* Number of times to retry creating a file when there are transient
* errors (typically related to encryption zones and KeyProvider operations).
*/
@VisibleForTesting
public static final int CREATE_RETRY_COUNT = 10;
private final DFSClient dfsClient; private final DFSClient dfsClient;
private Socket s; private Socket s;
// closed is accessed by different threads under different locks. // closed is accessed by different threads under different locks.
@ -1648,23 +1657,46 @@ public class DFSOutputStream extends FSOutputSummer
short replication, long blockSize, Progressable progress, int buffersize, short replication, long blockSize, Progressable progress, int buffersize,
DataChecksum checksum, String[] favoredNodes, DataChecksum checksum, String[] favoredNodes,
List<CipherSuite> cipherSuites) throws IOException { List<CipherSuite> cipherSuites) throws IOException {
final HdfsFileStatus stat; HdfsFileStatus stat = null;
try {
stat = dfsClient.namenode.create(src, masked, dfsClient.clientName, // Retry the create if we get a RetryStartFileException up to a maximum
new EnumSetWritable<CreateFlag>(flag), createParent, replication, // number of times
blockSize, cipherSuites); boolean shouldRetry = true;
} catch(RemoteException re) { int retryCount = CREATE_RETRY_COUNT;
throw re.unwrapRemoteException(AccessControlException.class, while (shouldRetry) {
DSQuotaExceededException.class, shouldRetry = false;
FileAlreadyExistsException.class, try {
FileNotFoundException.class, stat = dfsClient.namenode.create(src, masked, dfsClient.clientName,
ParentNotDirectoryException.class, new EnumSetWritable<CreateFlag>(flag), createParent, replication,
NSQuotaExceededException.class, blockSize, cipherSuites);
SafeModeException.class, break;
UnresolvedPathException.class, } catch (RemoteException re) {
SnapshotAccessControlException.class, IOException e = re.unwrapRemoteException(
UnknownCipherSuiteException.class); AccessControlException.class,
DSQuotaExceededException.class,
FileAlreadyExistsException.class,
FileNotFoundException.class,
ParentNotDirectoryException.class,
NSQuotaExceededException.class,
RetryStartFileException.class,
SafeModeException.class,
UnresolvedPathException.class,
SnapshotAccessControlException.class,
UnknownCipherSuiteException.class);
if (e instanceof RetryStartFileException) {
if (retryCount > 0) {
shouldRetry = true;
retryCount--;
} else {
throw new IOException("Too many retries because of encryption" +
" zone operations", e);
}
} else {
throw e;
}
}
} }
Preconditions.checkNotNull(stat, "HdfsFileStatus should not be null!");
final DFSOutputStream out = new DFSOutputStream(dfsClient, src, stat, final DFSOutputStream out = new DFSOutputStream(dfsClient, src, stat,
flag, progress, checksum, favoredNodes); flag, progress, checksum, favoredNodes);
out.start(); out.start();

View File

@ -2448,84 +2448,66 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
waitForLoadingFSImage(); waitForLoadingFSImage();
/* /**
* We want to avoid holding any locks while doing KeyProvider operations, * If the file is in an encryption zone, we optimistically create an
* since they can be very slow. Since the path can * EDEK for the file by calling out to the configured KeyProvider.
* flip flop between being in an encryption zone and not in the meantime, * Since this typically involves doing an RPC, we take the readLock
* we need to recheck the preconditions and redo KeyProvider operations * initially, then drop it to do the RPC.
* in some situations. *
* * Since the path can flip-flop between being in an encryption zone and not
* A special RetryStartFileException is used to indicate that we should * in the meantime, we need to recheck the preconditions when we retake the
* retry creation of a FileEncryptionInfo. * lock to do the create. If the preconditions are not met, we throw a
* special RetryStartFileException to ask the DFSClient to try the create
* again later.
*/ */
BlocksMapUpdateInfo toRemoveBlocks = null; CipherSuite suite = null;
String ezKeyName = null;
readLock();
try { try {
boolean shouldContinue = true; src = resolvePath(src, pathComponents);
int iters = 0; INodesInPath iip = dir.getINodesInPath4Write(src);
while (shouldContinue) { // Nothing to do if the path is not within an EZ
skipSync = false; if (dir.isInAnEZ(iip)) {
if (iters >= 10) { suite = chooseCipherSuite(iip, cipherSuites);
throw new IOException("Too many retries because of encryption zone " + if (suite != null) {
"operations, something might be broken!"); Preconditions.checkArgument(!suite.equals(CipherSuite.UNKNOWN),
} "Chose an UNKNOWN CipherSuite!");
shouldContinue = false;
iters++;
// Optimistically determine CipherSuite and ezKeyName if the path is
// currently within an encryption zone
CipherSuite suite = null;
String ezKeyName = null;
readLock();
try {
src = resolvePath(src, pathComponents);
INodesInPath iip = dir.getINodesInPath4Write(src);
// Nothing to do if the path is not within an EZ
if (dir.isInAnEZ(iip)) {
suite = chooseCipherSuite(iip, cipherSuites);
if (suite != null) {
Preconditions.checkArgument(!suite.equals(CipherSuite.UNKNOWN),
"Chose an UNKNOWN CipherSuite!");
}
ezKeyName = dir.getKeyName(iip);
Preconditions.checkState(ezKeyName != null);
}
} finally {
readUnlock();
}
Preconditions.checkState(
(suite == null && ezKeyName == null) ||
(suite != null && ezKeyName != null),
"Both suite and ezKeyName should both be null or not null");
// Generate EDEK if necessary while not holding the lock
EncryptedKeyVersion edek =
generateEncryptedDataEncryptionKey(ezKeyName);
EncryptionFaultInjector.getInstance().startFileAfterGenerateKey();
// Try to create the file with the computed cipher suite and EDEK
writeLock();
try {
checkOperation(OperationCategory.WRITE);
checkNameNodeSafeMode("Cannot create file" + src);
src = resolvePath(src, pathComponents);
toRemoveBlocks = startFileInternal(pc, src, permissions, holder,
clientMachine, create, overwrite, createParent, replication,
blockSize, suite, edek, logRetryCache);
stat = dir.getFileInfo(src, false,
FSDirectory.isReservedRawName(srcArg));
} catch (StandbyException se) {
skipSync = true;
throw se;
} catch (RetryStartFileException e) {
shouldContinue = true;
if (LOG.isTraceEnabled()) {
LOG.trace("Preconditions failed, retrying creation of " +
"FileEncryptionInfo", e);
}
} finally {
writeUnlock();
} }
ezKeyName = dir.getKeyName(iip);
Preconditions.checkState(ezKeyName != null);
} }
} finally { } finally {
readUnlock();
}
Preconditions.checkState(
(suite == null && ezKeyName == null) ||
(suite != null && ezKeyName != null),
"Both suite and ezKeyName should both be null or not null");
// Generate EDEK if necessary while not holding the lock
EncryptedKeyVersion edek =
generateEncryptedDataEncryptionKey(ezKeyName);
EncryptionFaultInjector.getInstance().startFileAfterGenerateKey();
// Proceed with the create, using the computed cipher suite and
// generated EDEK
BlocksMapUpdateInfo toRemoveBlocks = null;
writeLock();
try {
checkOperation(OperationCategory.WRITE);
checkNameNodeSafeMode("Cannot create file" + src);
src = resolvePath(src, pathComponents);
toRemoveBlocks = startFileInternal(pc, src, permissions, holder,
clientMachine, create, overwrite, createParent, replication,
blockSize, suite, edek, logRetryCache);
stat = dir.getFileInfo(src, false,
FSDirectory.isReservedRawName(srcArg));
} catch (StandbyException se) {
skipSync = true;
throw se;
} finally {
writeUnlock();
// There might be transactions logged while trying to recover the lease. // There might be transactions logged while trying to recover the lease.
// They need to be sync'ed even when an exception was thrown. // They need to be sync'ed even when an exception was thrown.
if (!skipSync) { if (!skipSync) {

View File

@ -17,5 +17,20 @@
*/ */
package org.apache.hadoop.hdfs.server.namenode; package org.apache.hadoop.hdfs.server.namenode;
public class RetryStartFileException extends Exception { import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
@InterfaceAudience.Private
public class RetryStartFileException extends IOException {
private static final long serialVersionUID = 1L;
public RetryStartFileException() {
super("Preconditions for creating a file failed because of a " +
"transient error, retry create later.");
}
public RetryStartFileException(String s) {
super(s);
}
} }

View File

@ -940,7 +940,7 @@ public class TestEncryptionZones {
Future<?> future = executor.submit(new CreateFileTask(fsWrapper, file)); Future<?> future = executor.submit(new CreateFileTask(fsWrapper, file));
// Flip-flop between two EZs to repeatedly fail // Flip-flop between two EZs to repeatedly fail
for (int i=0; i<10; i++) { for (int i=0; i<DFSOutputStream.CREATE_RETRY_COUNT+1; i++) {
injector.ready.await(); injector.ready.await();
fsWrapper.delete(zone1, true); fsWrapper.delete(zone1, true);
fsWrapper.mkdir(zone1, FsPermission.getDirDefault(), true); fsWrapper.mkdir(zone1, FsPermission.getDirDefault(), true);