HDFS-6970. Move startFile EDEK retries to the DFSClient. (wang)

This commit is contained in:
Andrew Wang 2014-09-18 17:35:24 -07:00
parent fe38d2e9b5
commit 20a076bafc
5 changed files with 122 additions and 91 deletions

View File

@ -562,6 +562,8 @@ Release 2.6.0 - UNRELEASED
HDFS-6727. Refresh data volumes on DataNode based on configuration changes
(Lei Xu via cmccabe)
HDFS-6970. Move startFile EDEK retries to the DFSClient. (wang)
OPTIMIZATIONS
HDFS-6690. Deduplicate xattr names in memory. (wang)

View File

@ -41,6 +41,7 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import com.google.common.base.Preconditions;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.crypto.CipherSuite;
import org.apache.hadoop.fs.CanSetDropBehind;
@ -76,6 +77,7 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
import org.apache.hadoop.hdfs.server.namenode.NotReplicatedYetException;
import org.apache.hadoop.hdfs.server.namenode.RetryStartFileException;
import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
import org.apache.hadoop.io.EnumSetWritable;
import org.apache.hadoop.io.IOUtils;
@ -126,6 +128,13 @@ import com.google.common.cache.RemovalNotification;
public class DFSOutputStream extends FSOutputSummer
implements Syncable, CanSetDropBehind {
private static final int MAX_PACKETS = 80; // each packet 64K, total 5MB
/**
* Number of times to retry creating a file when there are transient
* errors (typically related to encryption zones and KeyProvider operations).
*/
@VisibleForTesting
public static final int CREATE_RETRY_COUNT = 10;
private final DFSClient dfsClient;
private final long dfsclientSlowLogThresholdMs;
private Socket s;
@ -1648,23 +1657,46 @@ public class DFSOutputStream extends FSOutputSummer
short replication, long blockSize, Progressable progress, int buffersize,
DataChecksum checksum, String[] favoredNodes,
List<CipherSuite> cipherSuites) throws IOException {
final HdfsFileStatus stat;
HdfsFileStatus stat = null;
// Retry the create if we get a RetryStartFileException up to a maximum
// number of times
boolean shouldRetry = true;
int retryCount = CREATE_RETRY_COUNT;
while (shouldRetry) {
shouldRetry = false;
try {
stat = dfsClient.namenode.create(src, masked, dfsClient.clientName,
new EnumSetWritable<CreateFlag>(flag), createParent, replication,
blockSize, cipherSuites);
break;
} catch (RemoteException re) {
throw re.unwrapRemoteException(AccessControlException.class,
IOException e = re.unwrapRemoteException(
AccessControlException.class,
DSQuotaExceededException.class,
FileAlreadyExistsException.class,
FileNotFoundException.class,
ParentNotDirectoryException.class,
NSQuotaExceededException.class,
RetryStartFileException.class,
SafeModeException.class,
UnresolvedPathException.class,
SnapshotAccessControlException.class,
UnknownCipherSuiteException.class);
if (e instanceof RetryStartFileException) {
if (retryCount > 0) {
shouldRetry = true;
retryCount--;
} else {
throw new IOException("Too many retries because of encryption" +
" zone operations", e);
}
} else {
throw e;
}
}
}
Preconditions.checkNotNull(stat, "HdfsFileStatus should not be null!");
final DFSOutputStream out = new DFSOutputStream(dfsClient, src, stat,
flag, progress, checksum, favoredNodes);
out.start();

View File

@ -2490,31 +2490,18 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
waitForLoadingFSImage();
/*
* We want to avoid holding any locks while doing KeyProvider operations,
* since they can be very slow. Since the path can
* flip flop between being in an encryption zone and not in the meantime,
* we need to recheck the preconditions and redo KeyProvider operations
* in some situations.
/**
* If the file is in an encryption zone, we optimistically create an
* EDEK for the file by calling out to the configured KeyProvider.
* Since this typically involves doing an RPC, we take the readLock
* initially, then drop it to do the RPC.
*
* A special RetryStartFileException is used to indicate that we should
* retry creation of a FileEncryptionInfo.
* Since the path can flip-flop between being in an encryption zone and not
* in the meantime, we need to recheck the preconditions when we retake the
* lock to do the create. If the preconditions are not met, we throw a
* special RetryStartFileException to ask the DFSClient to try the create
* again later.
*/
BlocksMapUpdateInfo toRemoveBlocks = null;
try {
boolean shouldContinue = true;
int iters = 0;
while (shouldContinue) {
skipSync = false;
if (iters >= 10) {
throw new IOException("Too many retries because of encryption zone " +
"operations, something might be broken!");
}
shouldContinue = false;
iters++;
// Optimistically determine CipherSuite and ezKeyName if the path is
// currently within an encryption zone
CipherSuite suite = null;
String ezKeyName = null;
readLock();
@ -2539,11 +2526,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
(suite == null && ezKeyName == null) ||
(suite != null && ezKeyName != null),
"Both suite and ezKeyName should both be null or not null");
// Generate EDEK if necessary while not holding the lock
EncryptedKeyVersion edek =
generateEncryptedDataEncryptionKey(ezKeyName);
EncryptionFaultInjector.getInstance().startFileAfterGenerateKey();
// Try to create the file with the computed cipher suite and EDEK
// Proceed with the create, using the computed cipher suite and
// generated EDEK
BlocksMapUpdateInfo toRemoveBlocks = null;
writeLock();
try {
checkOperation(OperationCategory.WRITE);
@ -2557,17 +2548,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
} catch (StandbyException se) {
skipSync = true;
throw se;
} catch (RetryStartFileException e) {
shouldContinue = true;
if (LOG.isTraceEnabled()) {
LOG.trace("Preconditions failed, retrying creation of " +
"FileEncryptionInfo", e);
}
} finally {
writeUnlock();
}
}
} finally {
// There might be transactions logged while trying to recover the lease.
// They need to be sync'ed even when an exception was thrown.
if (!skipSync) {

View File

@ -17,5 +17,20 @@
*/
package org.apache.hadoop.hdfs.server.namenode;
public class RetryStartFileException extends Exception {
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
@InterfaceAudience.Private
public class RetryStartFileException extends IOException {
private static final long serialVersionUID = 1L;
public RetryStartFileException() {
super("Preconditions for creating a file failed because of a " +
"transient error, retry create later.");
}
public RetryStartFileException(String s) {
super(s);
}
}

View File

@ -940,7 +940,7 @@ public class TestEncryptionZones {
Future<?> future = executor.submit(new CreateFileTask(fsWrapper, file));
// Flip-flop between two EZs to repeatedly fail
for (int i=0; i<10; i++) {
for (int i=0; i<DFSOutputStream.CREATE_RETRY_COUNT+1; i++) {
injector.ready.await();
fsWrapper.delete(zone1, true);
fsWrapper.mkdir(zone1, FsPermission.getDirDefault(), true);